From a0e8343b9f82a2a09830c3a47f75ec8264f4e6ef Mon Sep 17 00:00:00 2001 From: DianQK Date: Sat, 9 Sep 2023 15:47:55 +0800 Subject: [PATCH 1/9] [EarlyCSE] Pre-commit offset-based GEP (NFC) --- llvm/test/Transforms/EarlyCSE/gep.ll | 47 ++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 llvm/test/Transforms/EarlyCSE/gep.ll diff --git a/llvm/test/Transforms/EarlyCSE/gep.ll b/llvm/test/Transforms/EarlyCSE/gep.ll new file mode 100644 index 0000000000000..838b6dbeff333 --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/gep.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -S -passes=early-cse -earlycse-debug-hash | FileCheck %s +; RUN: opt < %s -S -passes='early-cse' | FileCheck %s + +%T1 = type { i64, i64, i64 } + +declare void @use_vec(<4 x ptr>); + +define void @foo(ptr %a, <4 x i64> %b, i64 %i) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: ptr [[A:%.*]], <4 x i64> [[B:%.*]], i64 [[I:%.*]]) { +; CHECK-NEXT: [[S1A:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[S1C:%.*]] = getelementptr [[T1:%.*]], ptr [[A]], i64 0, i32 1 +; CHECK-NEXT: [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7 +; CHECK-NEXT: [[S1E:%.*]] = getelementptr i64, ptr [[A]], i64 1 +; CHECK-NEXT: [[S1F:%.*]] = getelementptr i32, ptr [[A]], i64 2 +; CHECK-NEXT: [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1 +; CHECK-NEXT: [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]] +; CHECK-NEXT: [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> +; CHECK-NEXT: call void @use_vec(<4 x ptr> [[V]]) +; CHECK-NEXT: [[V2:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> +; CHECK-NEXT: call void @use_vec(<4 x ptr> [[V2]]) +; CHECK-NEXT: ret void +; + %s1a = getelementptr i8, ptr %a, i64 8 + %s1av = load i64, ptr %s1a + %s1b = getelementptr inbounds i8, ptr %a, i64 8 + %s1bv = load i64, ptr %s1b + %s1c = getelementptr %T1, ptr %a, i64 0, i32 1 + %s1cv = load i64, ptr %s1c + %n1d = getelementptr i8, ptr %a, i64 7 + %n1dv = load i64, ptr %n1d + %s1e = getelementptr i64, ptr %a, i64 1 + %s1ev = load i64, ptr %s1e + %s1f = getelementptr i32, ptr %a, i64 2 + %s1fv = load i64, ptr %s1f + %n1g = getelementptr i32, ptr %a, i64 1 + %n1gv = load i64, ptr %n1g + %n1h = getelementptr i8, ptr %a, i64 %i + %n1hv = load i64, ptr %n1h + + %v = getelementptr i64, ptr %a, <4 x i64> + call void @use_vec(<4 x ptr> %v) + %v2 = getelementptr i64, ptr %a, <4 x i64> + call void @use_vec(<4 x ptr> %v2) + ret void +} From ac1daad9bb4eb083df6b215c029816d3149e00d8 Mon Sep 17 00:00:00 2001 From: DianQK Date: Sun, 10 Sep 2023 13:14:57 +0800 Subject: [PATCH 2/9] [EarlyCSE] Add a vectorization failure example (NFC) --- .../PhaseOrdering/X86/unroll-vectorizer.ll | 138 ++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll new file mode 100644 index 0000000000000..3072cb39e0133 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt < %s -O3 -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%Zip = type { { ptr, ptr }, { [32 x i8], { i64, i64 } } } + +define void @foo(ptr %a, <32 x i8> %_0) #0 { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: ptr nocapture writeonly [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: start: +; CHECK-NEXT: [[_34I:%.*]] = getelementptr i8, ptr [[A]], i64 1 +; CHECK-NEXT: [[Z_SROA_0_16_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 0 +; CHECK-NEXT: store i8 [[Z_SROA_0_16_VEC_EXTRACT]], ptr [[A]], align 1 +; CHECK-NEXT: [[_34I_1:%.*]] = getelementptr i8, ptr [[A]], i64 2 +; CHECK-NEXT: [[Z_SROA_0_17_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 1 +; CHECK-NEXT: store i8 [[Z_SROA_0_17_VEC_EXTRACT]], ptr [[_34I]], align 1 +; CHECK-NEXT: [[_34I_2:%.*]] = getelementptr i8, ptr [[A]], i64 3 +; CHECK-NEXT: [[Z_SROA_0_18_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 2 +; CHECK-NEXT: store i8 [[Z_SROA_0_18_VEC_EXTRACT]], ptr [[_34I_1]], align 1 +; CHECK-NEXT: [[_34I_3:%.*]] = getelementptr i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[Z_SROA_0_19_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 3 +; CHECK-NEXT: store i8 [[Z_SROA_0_19_VEC_EXTRACT]], ptr [[_34I_2]], align 1 +; CHECK-NEXT: [[_34I_4:%.*]] = getelementptr i8, ptr [[A]], i64 5 +; CHECK-NEXT: [[Z_SROA_0_20_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 4 +; CHECK-NEXT: store i8 [[Z_SROA_0_20_VEC_EXTRACT]], ptr [[_34I_3]], align 1 +; CHECK-NEXT: [[_34I_5:%.*]] = getelementptr i8, ptr [[A]], i64 6 +; CHECK-NEXT: [[Z_SROA_0_21_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 5 +; CHECK-NEXT: store i8 [[Z_SROA_0_21_VEC_EXTRACT]], ptr [[_34I_4]], align 1 +; CHECK-NEXT: [[_34I_6:%.*]] = getelementptr i8, ptr [[A]], i64 7 +; CHECK-NEXT: [[Z_SROA_0_22_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 6 +; CHECK-NEXT: store i8 [[Z_SROA_0_22_VEC_EXTRACT]], ptr [[_34I_5]], align 1 +; CHECK-NEXT: [[_34I_7:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[Z_SROA_0_23_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 7 +; CHECK-NEXT: store i8 [[Z_SROA_0_23_VEC_EXTRACT]], ptr [[_34I_6]], align 1 +; CHECK-NEXT: [[_34I_8:%.*]] = getelementptr i8, ptr [[A]], i64 9 +; CHECK-NEXT: [[Z_SROA_0_24_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 8 +; CHECK-NEXT: store i8 [[Z_SROA_0_24_VEC_EXTRACT]], ptr [[_34I_7]], align 1 +; CHECK-NEXT: [[_34I_9:%.*]] = getelementptr i8, ptr [[A]], i64 10 +; CHECK-NEXT: [[Z_SROA_0_25_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 9 +; CHECK-NEXT: store i8 [[Z_SROA_0_25_VEC_EXTRACT]], ptr [[_34I_8]], align 1 +; CHECK-NEXT: [[_34I_10:%.*]] = getelementptr i8, ptr [[A]], i64 11 +; CHECK-NEXT: [[Z_SROA_0_26_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 10 +; CHECK-NEXT: store i8 [[Z_SROA_0_26_VEC_EXTRACT]], ptr [[_34I_9]], align 1 +; CHECK-NEXT: [[_34I_11:%.*]] = getelementptr i8, ptr [[A]], i64 12 +; CHECK-NEXT: [[Z_SROA_0_27_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 11 +; CHECK-NEXT: store i8 [[Z_SROA_0_27_VEC_EXTRACT]], ptr [[_34I_10]], align 1 +; CHECK-NEXT: [[_34I_12:%.*]] = getelementptr i8, ptr [[A]], i64 13 +; CHECK-NEXT: [[Z_SROA_0_28_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 12 +; CHECK-NEXT: store i8 [[Z_SROA_0_28_VEC_EXTRACT]], ptr [[_34I_11]], align 1 +; CHECK-NEXT: [[_34I_13:%.*]] = getelementptr i8, ptr [[A]], i64 14 +; CHECK-NEXT: [[Z_SROA_0_29_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 13 +; CHECK-NEXT: store i8 [[Z_SROA_0_29_VEC_EXTRACT]], ptr [[_34I_12]], align 1 +; CHECK-NEXT: [[_34I_14:%.*]] = getelementptr i8, ptr [[A]], i64 15 +; CHECK-NEXT: [[Z_SROA_0_30_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 14 +; CHECK-NEXT: store i8 [[Z_SROA_0_30_VEC_EXTRACT]], ptr [[_34I_13]], align 1 +; CHECK-NEXT: [[_34I_15:%.*]] = getelementptr i8, ptr [[A]], i64 16 +; CHECK-NEXT: [[Z_SROA_0_31_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 15 +; CHECK-NEXT: store i8 [[Z_SROA_0_31_VEC_EXTRACT]], ptr [[_34I_14]], align 1 +; CHECK-NEXT: [[_34I_16:%.*]] = getelementptr i8, ptr [[A]], i64 17 +; CHECK-NEXT: [[Z_SROA_0_32_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 16 +; CHECK-NEXT: store i8 [[Z_SROA_0_32_VEC_EXTRACT]], ptr [[_34I_15]], align 1 +; CHECK-NEXT: [[_34I_17:%.*]] = getelementptr i8, ptr [[A]], i64 18 +; CHECK-NEXT: [[Z_SROA_0_33_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 17 +; CHECK-NEXT: store i8 [[Z_SROA_0_33_VEC_EXTRACT]], ptr [[_34I_16]], align 1 +; CHECK-NEXT: [[_34I_18:%.*]] = getelementptr i8, ptr [[A]], i64 19 +; CHECK-NEXT: [[Z_SROA_0_34_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 18 +; CHECK-NEXT: store i8 [[Z_SROA_0_34_VEC_EXTRACT]], ptr [[_34I_17]], align 1 +; CHECK-NEXT: [[_34I_19:%.*]] = getelementptr i8, ptr [[A]], i64 20 +; CHECK-NEXT: [[Z_SROA_0_35_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 19 +; CHECK-NEXT: store i8 [[Z_SROA_0_35_VEC_EXTRACT]], ptr [[_34I_18]], align 1 +; CHECK-NEXT: [[_34I_20:%.*]] = getelementptr i8, ptr [[A]], i64 21 +; CHECK-NEXT: [[Z_SROA_0_36_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 20 +; CHECK-NEXT: store i8 [[Z_SROA_0_36_VEC_EXTRACT]], ptr [[_34I_19]], align 1 +; CHECK-NEXT: [[_34I_21:%.*]] = getelementptr i8, ptr [[A]], i64 22 +; CHECK-NEXT: [[Z_SROA_0_37_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 21 +; CHECK-NEXT: store i8 [[Z_SROA_0_37_VEC_EXTRACT]], ptr [[_34I_20]], align 1 +; CHECK-NEXT: [[_34I_22:%.*]] = getelementptr i8, ptr [[A]], i64 23 +; CHECK-NEXT: [[Z_SROA_0_38_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 22 +; CHECK-NEXT: store i8 [[Z_SROA_0_38_VEC_EXTRACT]], ptr [[_34I_21]], align 1 +; CHECK-NEXT: [[_34I_23:%.*]] = getelementptr i8, ptr [[A]], i64 24 +; CHECK-NEXT: [[Z_SROA_0_39_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 23 +; CHECK-NEXT: store i8 [[Z_SROA_0_39_VEC_EXTRACT]], ptr [[_34I_22]], align 1 +; CHECK-NEXT: [[_34I_24:%.*]] = getelementptr i8, ptr [[A]], i64 25 +; CHECK-NEXT: [[Z_SROA_0_40_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 24 +; CHECK-NEXT: store i8 [[Z_SROA_0_40_VEC_EXTRACT]], ptr [[_34I_23]], align 1 +; CHECK-NEXT: [[_34I_25:%.*]] = getelementptr i8, ptr [[A]], i64 26 +; CHECK-NEXT: [[Z_SROA_0_41_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 25 +; CHECK-NEXT: store i8 [[Z_SROA_0_41_VEC_EXTRACT]], ptr [[_34I_24]], align 1 +; CHECK-NEXT: [[_34I_26:%.*]] = getelementptr i8, ptr [[A]], i64 27 +; CHECK-NEXT: [[Z_SROA_0_42_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 26 +; CHECK-NEXT: store i8 [[Z_SROA_0_42_VEC_EXTRACT]], ptr [[_34I_25]], align 1 +; CHECK-NEXT: [[_34I_27:%.*]] = getelementptr i8, ptr [[A]], i64 28 +; CHECK-NEXT: [[Z_SROA_0_43_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 27 +; CHECK-NEXT: store i8 [[Z_SROA_0_43_VEC_EXTRACT]], ptr [[_34I_26]], align 1 +; CHECK-NEXT: [[_34I_28:%.*]] = getelementptr i8, ptr [[A]], i64 29 +; CHECK-NEXT: [[Z_SROA_0_44_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 28 +; CHECK-NEXT: store i8 [[Z_SROA_0_44_VEC_EXTRACT]], ptr [[_34I_27]], align 1 +; CHECK-NEXT: [[_34I_29:%.*]] = getelementptr i8, ptr [[A]], i64 30 +; CHECK-NEXT: [[Z_SROA_0_45_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 29 +; CHECK-NEXT: store i8 [[Z_SROA_0_45_VEC_EXTRACT]], ptr [[_34I_28]], align 1 +; CHECK-NEXT: [[_34I_30:%.*]] = getelementptr i8, ptr [[A]], i64 31 +; CHECK-NEXT: [[Z_SROA_0_46_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 30 +; CHECK-NEXT: store i8 [[Z_SROA_0_46_VEC_EXTRACT]], ptr [[_34I_29]], align 1 +; CHECK-NEXT: [[Z_SROA_0_47_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 31 +; CHECK-NEXT: store i8 [[Z_SROA_0_47_VEC_EXTRACT]], ptr [[_34I_30]], align 1 +; CHECK-NEXT: ret void +; +start: + %z = alloca %Zip, align 8 + %sroa_1 = getelementptr i8, ptr %z, i64 16 + store <32 x i8> %_0, ptr %sroa_1, align 8 + %len_ = getelementptr i8, ptr %z, i64 56 + store i64 32, ptr %len_, align 8 + %_1 = getelementptr %Zip, ptr %z, i64 0, i32 1, i32 1 + %_2 = getelementptr %Zip, ptr %z, i64 0, i32 1, i32 1, i32 1 + %len = load i64, ptr %_2, align 8 + %_10 = getelementptr %Zip, ptr %z, i64 0, i32 1 + br label %body + +body: ; preds = %body, %start + %_34 = phi ptr [ %_34i, %body ], [ %a, %start ] + %idx = phi i64 [ %idx_, %body ], [ 0, %start ] + %_34i = getelementptr i8, ptr %_34, i64 1 + %idx_ = add i64 %idx, 1 + store i64 0, ptr %_1, align 8 + %_24 = getelementptr i8, ptr %_10, i64 %idx + %_18 = load i8, ptr %_24, align 1 + store i8 %_18, ptr %_34, align 1 + %_6 = icmp eq i64 %len, %idx_ + br i1 %_6, label %exit, label %body + +exit: ; preds = %body + ret void +} + +attributes #0 = { "target-cpu"="znver3" } From 31e2ec9d89aeded9ea5da822262449b0c4e8ab16 Mon Sep 17 00:00:00 2001 From: DianQK Date: Sat, 16 Sep 2023 11:48:08 +0800 Subject: [PATCH 3/9] [EarlyCSE] Compare GEP instructions based on offset This will provide more opportunities for constant propagation for subsequent optimizations. --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 153 +++++++++++++++--- llvm/test/Transforms/EarlyCSE/gep.ll | 3 - .../PhaseOrdering/X86/unroll-vectorizer.ll | 96 +---------- 3 files changed, 134 insertions(+), 118 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 439235f47471e..4c69a2f7d75c3 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -143,11 +143,11 @@ struct SimpleValue { !CI->getFunction()->isPresplitCoroutine(); } return isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst); + isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst); } }; @@ -307,10 +307,9 @@ static unsigned getHashValueImpl(SimpleValue Val) { IVI->getOperand(1), hash_combine_range(IVI->idx_begin(), IVI->idx_end())); - assert((isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst)) && + assert((isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst)) && "Invalid/unknown instruction"); // Handle intrinsics with commutative operands. @@ -553,6 +552,77 @@ bool DenseMapInfo::isEqual(CallValue LHS, CallValue RHS) { return LHSI->isIdenticalTo(RHSI); } +//===----------------------------------------------------------------------===// +// GEPValue +//===----------------------------------------------------------------------===// + +namespace { + +struct GEPValue { + Instruction *Inst; + APInt ConstantOffset; + bool HasConstantOffset; + + GEPValue(Instruction *I) : Inst(I), HasConstantOffset(false) { + assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); + } + GEPValue(Instruction *I, APInt ConstantOffset, bool HasConstantOffset) + : Inst(I), ConstantOffset(ConstantOffset), + HasConstantOffset(HasConstantOffset) { + assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); + } + + bool isSentinel() const { + return Inst == DenseMapInfo::getEmptyKey() || + Inst == DenseMapInfo::getTombstoneKey(); + } + + static bool canHandle(Instruction *Inst) { + return isa(Inst); + } +}; + +} // namespace + +namespace llvm { + +template <> struct DenseMapInfo { + static inline GEPValue getEmptyKey() { + return DenseMapInfo::getEmptyKey(); + } + + static inline GEPValue getTombstoneKey() { + return DenseMapInfo::getTombstoneKey(); + } + + static unsigned getHashValue(GEPValue Val); + static bool isEqual(GEPValue LHS, GEPValue RHS); +}; + +} // end namespace llvm + +unsigned DenseMapInfo::getHashValue(GEPValue Val) { + GetElementPtrInst *GEP = cast(Val.Inst); + if (Val.HasConstantOffset) + return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(), + Val.ConstantOffset); + return hash_combine( + GEP->getOpcode(), + hash_combine_range(GEP->value_op_begin(), GEP->value_op_end())); +} + +bool DenseMapInfo::isEqual(GEPValue LHS, GEPValue RHS) { + if (LHS.isSentinel() || RHS.isSentinel()) + return LHS.Inst == RHS.Inst; + GetElementPtrInst *LGEP = cast(LHS.Inst); + GetElementPtrInst *RGEP = cast(RHS.Inst); + if (LGEP->getPointerOperand() != RGEP->getPointerOperand()) + return false; + if (LHS.HasConstantOffset && RHS.HasConstantOffset) + return LHS.ConstantOffset == RHS.ConstantOffset; + return LGEP->isIdenticalToWhenDefined(RGEP); +} + //===----------------------------------------------------------------------===// // EarlyCSE implementation //===----------------------------------------------------------------------===// @@ -647,6 +717,13 @@ class EarlyCSE { ScopedHashTable>; CallHTType AvailableCalls; + using GEPMapAllocatorTy = + RecyclingAllocator>; + using GEPHTType = ScopedHashTable, + GEPMapAllocatorTy>; + GEPHTType AvailableGEPs; + /// This is the current generation of the memory value. unsigned CurrentGeneration = 0; @@ -667,9 +744,11 @@ class EarlyCSE { class NodeScope { public: NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads, - InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls) - : Scope(AvailableValues), LoadScope(AvailableLoads), - InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {} + InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls, + GEPHTType &AvailableGEPs) + : Scope(AvailableValues), LoadScope(AvailableLoads), + InvariantScope(AvailableInvariants), CallScope(AvailableCalls), + GEPScope(AvailableGEPs) {} NodeScope(const NodeScope &) = delete; NodeScope &operator=(const NodeScope &) = delete; @@ -678,6 +757,7 @@ class EarlyCSE { LoadHTType::ScopeTy LoadScope; InvariantHTType::ScopeTy InvariantScope; CallHTType::ScopeTy CallScope; + GEPHTType::ScopeTy GEPScope; }; // Contains all the needed information to create a stack for doing a depth @@ -688,13 +768,13 @@ class EarlyCSE { public: StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads, InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls, - unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child, + GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n, + DomTreeNode::const_iterator child, DomTreeNode::const_iterator end) : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child), EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableInvariants, - AvailableCalls) - {} + AvailableCalls, AvailableGEPs) {} StackNode(const StackNode &) = delete; StackNode &operator=(const StackNode &) = delete; @@ -1561,6 +1641,39 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { continue; } + if (GEPValue::canHandle(&Inst)) { + GetElementPtrInst *GEP = cast(&Inst); + APInt Offset(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0); + bool HasConstantOffset = GEP->accumulateConstantOffset(SQ.DL, Offset); + GEPValue GEPVal(GEP, Offset, HasConstantOffset); + if (Value *V = AvailableGEPs.lookup(GEPVal)) { + LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V + << '\n'); + if (auto *I = dyn_cast(V)) { + // If I being poison triggers UB, there is no need to drop those + // flags. Otherwise, only retain flags present on both I and Inst. + // TODO: Currently some fast-math flags are not treated as + // poison-generating even though they should. Until this is fixed, + // always retain flags present on both I and Inst for floating point + // instructions. + if (isa(I) || + (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))) + I->andIRFlags(&Inst); + } + Inst.replaceAllUsesWith(V); + salvageKnowledge(&Inst, &AC); + removeMSSA(Inst); + Inst.eraseFromParent(); + Changed = true; + ++NumCSE; + continue; + } + + // Otherwise, just remember that this value is available. + AvailableGEPs.insert(GEPVal, &Inst); + continue; + } + // A release fence requires that all stores complete before it, but does // not prevent the reordering of following loads 'before' the fence. As a // result, we don't need to consider it as writing to memory and don't need @@ -1675,7 +1788,7 @@ bool EarlyCSE::run() { // Process the root node. nodesToProcess.push_back(new StackNode( AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls, - CurrentGeneration, DT.getRootNode(), + AvailableGEPs, CurrentGeneration, DT.getRootNode(), DT.getRootNode()->begin(), DT.getRootNode()->end())); assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it."); @@ -1698,10 +1811,10 @@ bool EarlyCSE::run() { } else if (NodeToProcess->childIter() != NodeToProcess->end()) { // Push the next child onto the stack. DomTreeNode *child = NodeToProcess->nextChild(); - nodesToProcess.push_back( - new StackNode(AvailableValues, AvailableLoads, AvailableInvariants, - AvailableCalls, NodeToProcess->childGeneration(), - child, child->begin(), child->end())); + nodesToProcess.push_back(new StackNode( + AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls, + AvailableGEPs, NodeToProcess->childGeneration(), child, + child->begin(), child->end())); } else { // It has been processed, and there are no more children to process, // so delete it and pop it off the stack. diff --git a/llvm/test/Transforms/EarlyCSE/gep.ll b/llvm/test/Transforms/EarlyCSE/gep.ll index 838b6dbeff333..499b5ac8de0af 100644 --- a/llvm/test/Transforms/EarlyCSE/gep.ll +++ b/llvm/test/Transforms/EarlyCSE/gep.ll @@ -10,10 +10,7 @@ define void @foo(ptr %a, <4 x i64> %b, i64 %i) { ; CHECK-LABEL: define void @foo( ; CHECK-SAME: ptr [[A:%.*]], <4 x i64> [[B:%.*]], i64 [[I:%.*]]) { ; CHECK-NEXT: [[S1A:%.*]] = getelementptr i8, ptr [[A]], i64 8 -; CHECK-NEXT: [[S1C:%.*]] = getelementptr [[T1:%.*]], ptr [[A]], i64 0, i32 1 ; CHECK-NEXT: [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7 -; CHECK-NEXT: [[S1E:%.*]] = getelementptr i64, ptr [[A]], i64 1 -; CHECK-NEXT: [[S1F:%.*]] = getelementptr i32, ptr [[A]], i64 2 ; CHECK-NEXT: [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1 ; CHECK-NEXT: [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]] ; CHECK-NEXT: [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll index 3072cb39e0133..1c9e7a771ca19 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll @@ -10,101 +10,7 @@ define void @foo(ptr %a, <32 x i8> %_0) #0 { ; CHECK-LABEL: define void @foo( ; CHECK-SAME: ptr nocapture writeonly [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: start: -; CHECK-NEXT: [[_34I:%.*]] = getelementptr i8, ptr [[A]], i64 1 -; CHECK-NEXT: [[Z_SROA_0_16_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 0 -; CHECK-NEXT: store i8 [[Z_SROA_0_16_VEC_EXTRACT]], ptr [[A]], align 1 -; CHECK-NEXT: [[_34I_1:%.*]] = getelementptr i8, ptr [[A]], i64 2 -; CHECK-NEXT: [[Z_SROA_0_17_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 1 -; CHECK-NEXT: store i8 [[Z_SROA_0_17_VEC_EXTRACT]], ptr [[_34I]], align 1 -; CHECK-NEXT: [[_34I_2:%.*]] = getelementptr i8, ptr [[A]], i64 3 -; CHECK-NEXT: [[Z_SROA_0_18_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 2 -; CHECK-NEXT: store i8 [[Z_SROA_0_18_VEC_EXTRACT]], ptr [[_34I_1]], align 1 -; CHECK-NEXT: [[_34I_3:%.*]] = getelementptr i8, ptr [[A]], i64 4 -; CHECK-NEXT: [[Z_SROA_0_19_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 3 -; CHECK-NEXT: store i8 [[Z_SROA_0_19_VEC_EXTRACT]], ptr [[_34I_2]], align 1 -; CHECK-NEXT: [[_34I_4:%.*]] = getelementptr i8, ptr [[A]], i64 5 -; CHECK-NEXT: [[Z_SROA_0_20_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 4 -; CHECK-NEXT: store i8 [[Z_SROA_0_20_VEC_EXTRACT]], ptr [[_34I_3]], align 1 -; CHECK-NEXT: [[_34I_5:%.*]] = getelementptr i8, ptr [[A]], i64 6 -; CHECK-NEXT: [[Z_SROA_0_21_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 5 -; CHECK-NEXT: store i8 [[Z_SROA_0_21_VEC_EXTRACT]], ptr [[_34I_4]], align 1 -; CHECK-NEXT: [[_34I_6:%.*]] = getelementptr i8, ptr [[A]], i64 7 -; CHECK-NEXT: [[Z_SROA_0_22_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 6 -; CHECK-NEXT: store i8 [[Z_SROA_0_22_VEC_EXTRACT]], ptr [[_34I_5]], align 1 -; CHECK-NEXT: [[_34I_7:%.*]] = getelementptr i8, ptr [[A]], i64 8 -; CHECK-NEXT: [[Z_SROA_0_23_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 7 -; CHECK-NEXT: store i8 [[Z_SROA_0_23_VEC_EXTRACT]], ptr [[_34I_6]], align 1 -; CHECK-NEXT: [[_34I_8:%.*]] = getelementptr i8, ptr [[A]], i64 9 -; CHECK-NEXT: [[Z_SROA_0_24_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 8 -; CHECK-NEXT: store i8 [[Z_SROA_0_24_VEC_EXTRACT]], ptr [[_34I_7]], align 1 -; CHECK-NEXT: [[_34I_9:%.*]] = getelementptr i8, ptr [[A]], i64 10 -; CHECK-NEXT: [[Z_SROA_0_25_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 9 -; CHECK-NEXT: store i8 [[Z_SROA_0_25_VEC_EXTRACT]], ptr [[_34I_8]], align 1 -; CHECK-NEXT: [[_34I_10:%.*]] = getelementptr i8, ptr [[A]], i64 11 -; CHECK-NEXT: [[Z_SROA_0_26_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 10 -; CHECK-NEXT: store i8 [[Z_SROA_0_26_VEC_EXTRACT]], ptr [[_34I_9]], align 1 -; CHECK-NEXT: [[_34I_11:%.*]] = getelementptr i8, ptr [[A]], i64 12 -; CHECK-NEXT: [[Z_SROA_0_27_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 11 -; CHECK-NEXT: store i8 [[Z_SROA_0_27_VEC_EXTRACT]], ptr [[_34I_10]], align 1 -; CHECK-NEXT: [[_34I_12:%.*]] = getelementptr i8, ptr [[A]], i64 13 -; CHECK-NEXT: [[Z_SROA_0_28_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 12 -; CHECK-NEXT: store i8 [[Z_SROA_0_28_VEC_EXTRACT]], ptr [[_34I_11]], align 1 -; CHECK-NEXT: [[_34I_13:%.*]] = getelementptr i8, ptr [[A]], i64 14 -; CHECK-NEXT: [[Z_SROA_0_29_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 13 -; CHECK-NEXT: store i8 [[Z_SROA_0_29_VEC_EXTRACT]], ptr [[_34I_12]], align 1 -; CHECK-NEXT: [[_34I_14:%.*]] = getelementptr i8, ptr [[A]], i64 15 -; CHECK-NEXT: [[Z_SROA_0_30_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 14 -; CHECK-NEXT: store i8 [[Z_SROA_0_30_VEC_EXTRACT]], ptr [[_34I_13]], align 1 -; CHECK-NEXT: [[_34I_15:%.*]] = getelementptr i8, ptr [[A]], i64 16 -; CHECK-NEXT: [[Z_SROA_0_31_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 15 -; CHECK-NEXT: store i8 [[Z_SROA_0_31_VEC_EXTRACT]], ptr [[_34I_14]], align 1 -; CHECK-NEXT: [[_34I_16:%.*]] = getelementptr i8, ptr [[A]], i64 17 -; CHECK-NEXT: [[Z_SROA_0_32_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 16 -; CHECK-NEXT: store i8 [[Z_SROA_0_32_VEC_EXTRACT]], ptr [[_34I_15]], align 1 -; CHECK-NEXT: [[_34I_17:%.*]] = getelementptr i8, ptr [[A]], i64 18 -; CHECK-NEXT: [[Z_SROA_0_33_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 17 -; CHECK-NEXT: store i8 [[Z_SROA_0_33_VEC_EXTRACT]], ptr [[_34I_16]], align 1 -; CHECK-NEXT: [[_34I_18:%.*]] = getelementptr i8, ptr [[A]], i64 19 -; CHECK-NEXT: [[Z_SROA_0_34_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 18 -; CHECK-NEXT: store i8 [[Z_SROA_0_34_VEC_EXTRACT]], ptr [[_34I_17]], align 1 -; CHECK-NEXT: [[_34I_19:%.*]] = getelementptr i8, ptr [[A]], i64 20 -; CHECK-NEXT: [[Z_SROA_0_35_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 19 -; CHECK-NEXT: store i8 [[Z_SROA_0_35_VEC_EXTRACT]], ptr [[_34I_18]], align 1 -; CHECK-NEXT: [[_34I_20:%.*]] = getelementptr i8, ptr [[A]], i64 21 -; CHECK-NEXT: [[Z_SROA_0_36_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 20 -; CHECK-NEXT: store i8 [[Z_SROA_0_36_VEC_EXTRACT]], ptr [[_34I_19]], align 1 -; CHECK-NEXT: [[_34I_21:%.*]] = getelementptr i8, ptr [[A]], i64 22 -; CHECK-NEXT: [[Z_SROA_0_37_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 21 -; CHECK-NEXT: store i8 [[Z_SROA_0_37_VEC_EXTRACT]], ptr [[_34I_20]], align 1 -; CHECK-NEXT: [[_34I_22:%.*]] = getelementptr i8, ptr [[A]], i64 23 -; CHECK-NEXT: [[Z_SROA_0_38_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 22 -; CHECK-NEXT: store i8 [[Z_SROA_0_38_VEC_EXTRACT]], ptr [[_34I_21]], align 1 -; CHECK-NEXT: [[_34I_23:%.*]] = getelementptr i8, ptr [[A]], i64 24 -; CHECK-NEXT: [[Z_SROA_0_39_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 23 -; CHECK-NEXT: store i8 [[Z_SROA_0_39_VEC_EXTRACT]], ptr [[_34I_22]], align 1 -; CHECK-NEXT: [[_34I_24:%.*]] = getelementptr i8, ptr [[A]], i64 25 -; CHECK-NEXT: [[Z_SROA_0_40_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 24 -; CHECK-NEXT: store i8 [[Z_SROA_0_40_VEC_EXTRACT]], ptr [[_34I_23]], align 1 -; CHECK-NEXT: [[_34I_25:%.*]] = getelementptr i8, ptr [[A]], i64 26 -; CHECK-NEXT: [[Z_SROA_0_41_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 25 -; CHECK-NEXT: store i8 [[Z_SROA_0_41_VEC_EXTRACT]], ptr [[_34I_24]], align 1 -; CHECK-NEXT: [[_34I_26:%.*]] = getelementptr i8, ptr [[A]], i64 27 -; CHECK-NEXT: [[Z_SROA_0_42_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 26 -; CHECK-NEXT: store i8 [[Z_SROA_0_42_VEC_EXTRACT]], ptr [[_34I_25]], align 1 -; CHECK-NEXT: [[_34I_27:%.*]] = getelementptr i8, ptr [[A]], i64 28 -; CHECK-NEXT: [[Z_SROA_0_43_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 27 -; CHECK-NEXT: store i8 [[Z_SROA_0_43_VEC_EXTRACT]], ptr [[_34I_26]], align 1 -; CHECK-NEXT: [[_34I_28:%.*]] = getelementptr i8, ptr [[A]], i64 29 -; CHECK-NEXT: [[Z_SROA_0_44_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 28 -; CHECK-NEXT: store i8 [[Z_SROA_0_44_VEC_EXTRACT]], ptr [[_34I_27]], align 1 -; CHECK-NEXT: [[_34I_29:%.*]] = getelementptr i8, ptr [[A]], i64 30 -; CHECK-NEXT: [[Z_SROA_0_45_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 29 -; CHECK-NEXT: store i8 [[Z_SROA_0_45_VEC_EXTRACT]], ptr [[_34I_28]], align 1 -; CHECK-NEXT: [[_34I_30:%.*]] = getelementptr i8, ptr [[A]], i64 31 -; CHECK-NEXT: [[Z_SROA_0_46_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 30 -; CHECK-NEXT: store i8 [[Z_SROA_0_46_VEC_EXTRACT]], ptr [[_34I_29]], align 1 -; CHECK-NEXT: [[Z_SROA_0_47_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 31 -; CHECK-NEXT: store i8 [[Z_SROA_0_47_VEC_EXTRACT]], ptr [[_34I_30]], align 1 +; CHECK-NEXT: store <32 x i8> [[_0]], ptr [[A]], align 1 ; CHECK-NEXT: ret void ; start: From 089349f9eb47c57cfa2049ce0af5c7dd079a8581 Mon Sep 17 00:00:00 2001 From: DianQK Date: Sun, 17 Sep 2023 10:06:59 +0800 Subject: [PATCH 4/9] [EarlyCSE] Use ref --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 4c69a2f7d75c3..1bf614191c10b 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -595,13 +595,13 @@ template <> struct DenseMapInfo { return DenseMapInfo::getTombstoneKey(); } - static unsigned getHashValue(GEPValue Val); - static bool isEqual(GEPValue LHS, GEPValue RHS); + static unsigned getHashValue(const GEPValue &Val); + static bool isEqual(const GEPValue &LHS, const GEPValue &RHS); }; } // end namespace llvm -unsigned DenseMapInfo::getHashValue(GEPValue Val) { +unsigned DenseMapInfo::getHashValue(const GEPValue &Val) { GetElementPtrInst *GEP = cast(Val.Inst); if (Val.HasConstantOffset) return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(), @@ -611,7 +611,7 @@ unsigned DenseMapInfo::getHashValue(GEPValue Val) { hash_combine_range(GEP->value_op_begin(), GEP->value_op_end())); } -bool DenseMapInfo::isEqual(GEPValue LHS, GEPValue RHS) { +bool DenseMapInfo::isEqual(const GEPValue &LHS, const GEPValue &RHS) { if (LHS.isSentinel() || RHS.isSentinel()) return LHS.Inst == RHS.Inst; GetElementPtrInst *LGEP = cast(LHS.Inst); From 12eda5cf2df6438453683ce8c917a64786686a69 Mon Sep 17 00:00:00 2001 From: DianQK Date: Sun, 17 Sep 2023 16:08:12 +0800 Subject: [PATCH 5/9] fixup! [EarlyCSE] Compare GEP instructions based on offset Add comments and some minor changes --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 45 ++++++++++++------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 1bf614191c10b..06e31f2a4666a 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -67,6 +67,7 @@ STATISTIC(NumCSE, "Number of instructions CSE'd"); STATISTIC(NumCSECVP, "Number of compare instructions CVP'd"); STATISTIC(NumCSELoad, "Number of load instructions CSE'd"); STATISTIC(NumCSECall, "Number of call instructions CSE'd"); +STATISTIC(NumCSEGEP, "Number of GEP instructions CSE'd"); STATISTIC(NumDSE, "Number of trivial dead stores removed"); DEBUG_COUNTER(CSECounter, "early-cse", @@ -1294,6 +1295,20 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst, return Result; } +static void combineIRFlags(Instruction &From, Value *To) { + if (auto *I = dyn_cast(To)) { + // If I being poison triggers UB, there is no need to drop those + // flags. Otherwise, only retain flags present on both I and Inst. + // TODO: Currently some fast-math flags are not treated as + // poison-generating even though they should. Until this is fixed, + // always retain flags present on both I and Inst for floating point + // instructions. + if (isa(I) || + (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))) + I->andIRFlags(&From); + } +} + bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier, const ParseMemoryInst &Later) { // Can we remove Earlier store because of Later store? @@ -1519,16 +1534,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); continue; } - if (auto *I = dyn_cast(V)) { - // If I being poison triggers UB, there is no need to drop those - // flags. Otherwise, only retain flags present on both I and Inst. - // TODO: Currently some fast-math flags are not treated as - // poison-generating even though they should. Until this is fixed, - // always retain flags present on both I and Inst for floating point - // instructions. - if (isa(I) || (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))) - I->andIRFlags(&Inst); - } + combineIRFlags(Inst, V); Inst.replaceAllUsesWith(V); salvageKnowledge(&Inst, &AC); removeMSSA(Inst); @@ -1641,35 +1647,26 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { continue; } + // Compare GEP instructions based on offset. if (GEPValue::canHandle(&Inst)) { GetElementPtrInst *GEP = cast(&Inst); APInt Offset(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0); bool HasConstantOffset = GEP->accumulateConstantOffset(SQ.DL, Offset); GEPValue GEPVal(GEP, Offset, HasConstantOffset); if (Value *V = AvailableGEPs.lookup(GEPVal)) { - LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V + LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << " to: " << *V << '\n'); - if (auto *I = dyn_cast(V)) { - // If I being poison triggers UB, there is no need to drop those - // flags. Otherwise, only retain flags present on both I and Inst. - // TODO: Currently some fast-math flags are not treated as - // poison-generating even though they should. Until this is fixed, - // always retain flags present on both I and Inst for floating point - // instructions. - if (isa(I) || - (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))) - I->andIRFlags(&Inst); - } + combineIRFlags(Inst, V); Inst.replaceAllUsesWith(V); salvageKnowledge(&Inst, &AC); removeMSSA(Inst); Inst.eraseFromParent(); Changed = true; - ++NumCSE; + ++NumCSEGEP; continue; } - // Otherwise, just remember that this value is available. + // Otherwise, just remember that we have this GEP. AvailableGEPs.insert(GEPVal, &Inst); continue; } From b498e5cbaf70161c7d6a1a371780610f254cd160 Mon Sep 17 00:00:00 2001 From: DianQK Date: Tue, 19 Sep 2023 08:58:16 +0800 Subject: [PATCH 6/9] fixup! [EarlyCSE] Compare GEP instructions based on offset Use and and formatting --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 34 ++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 06e31f2a4666a..f7d02b0ccf2c6 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -548,7 +548,7 @@ bool DenseMapInfo::isEqual(CallValue LHS, CallValue RHS) { // currently executing, so conservatively return false if they are in // different basic blocks. if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent()) - return false; + return false; return LHSI->isIdenticalTo(RHSI); } @@ -565,21 +565,21 @@ struct GEPValue { bool HasConstantOffset; GEPValue(Instruction *I) : Inst(I), HasConstantOffset(false) { - assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); + assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } GEPValue(Instruction *I, APInt ConstantOffset, bool HasConstantOffset) : Inst(I), ConstantOffset(ConstantOffset), HasConstantOffset(HasConstantOffset) { - assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); + assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } bool isSentinel() const { - return Inst == DenseMapInfo::getEmptyKey() || - Inst == DenseMapInfo::getTombstoneKey(); + return Inst == DenseMapInfo::getEmptyKey() || + Inst == DenseMapInfo::getTombstoneKey(); } static bool canHandle(Instruction *Inst) { - return isa(Inst); + return isa(Inst); } }; @@ -589,11 +589,11 @@ namespace llvm { template <> struct DenseMapInfo { static inline GEPValue getEmptyKey() { - return DenseMapInfo::getEmptyKey(); + return DenseMapInfo::getEmptyKey(); } static inline GEPValue getTombstoneKey() { - return DenseMapInfo::getTombstoneKey(); + return DenseMapInfo::getTombstoneKey(); } static unsigned getHashValue(const GEPValue &Val); @@ -603,10 +603,10 @@ template <> struct DenseMapInfo { } // end namespace llvm unsigned DenseMapInfo::getHashValue(const GEPValue &Val) { - GetElementPtrInst *GEP = cast(Val.Inst); + auto *GEP = cast(Val.Inst); if (Val.HasConstantOffset) - return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(), - Val.ConstantOffset); + return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(), + Val.ConstantOffset); return hash_combine( GEP->getOpcode(), hash_combine_range(GEP->value_op_begin(), GEP->value_op_end())); @@ -614,13 +614,13 @@ unsigned DenseMapInfo::getHashValue(const GEPValue &Val) { bool DenseMapInfo::isEqual(const GEPValue &LHS, const GEPValue &RHS) { if (LHS.isSentinel() || RHS.isSentinel()) - return LHS.Inst == RHS.Inst; - GetElementPtrInst *LGEP = cast(LHS.Inst); - GetElementPtrInst *RGEP = cast(RHS.Inst); + return LHS.Inst == RHS.Inst; + auto *LGEP = cast(LHS.Inst); + auto *RGEP = cast(RHS.Inst); if (LGEP->getPointerOperand() != RGEP->getPointerOperand()) - return false; + return false; if (LHS.HasConstantOffset && RHS.HasConstantOffset) - return LHS.ConstantOffset == RHS.ConstantOffset; + return LHS.ConstantOffset == RHS.ConstantOffset; return LGEP->isIdenticalToWhenDefined(RGEP); } @@ -1649,7 +1649,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Compare GEP instructions based on offset. if (GEPValue::canHandle(&Inst)) { - GetElementPtrInst *GEP = cast(&Inst); + auto *GEP = cast(&Inst); APInt Offset(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0); bool HasConstantOffset = GEP->accumulateConstantOffset(SQ.DL, Offset); GEPValue GEPVal(GEP, Offset, HasConstantOffset); From 2803cd27502e68dc957aeeea3c95ebfdb23fde60 Mon Sep 17 00:00:00 2001 From: DianQK Date: Tue, 19 Sep 2023 15:33:43 +0800 Subject: [PATCH 7/9] [EarlyCSE] Use std::optional --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 27 +++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index f7d02b0ccf2c6..3048aaa997d8b 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -561,15 +561,14 @@ namespace { struct GEPValue { Instruction *Inst; - APInt ConstantOffset; - bool HasConstantOffset; + std::optional ConstantOffset; - GEPValue(Instruction *I) : Inst(I), HasConstantOffset(false) { + GEPValue(Instruction *I) : Inst(I) { assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } - GEPValue(Instruction *I, APInt ConstantOffset, bool HasConstantOffset) - : Inst(I), ConstantOffset(ConstantOffset), - HasConstantOffset(HasConstantOffset) { + + GEPValue(Instruction *I, std::optional ConstantOffset) + : Inst(I), ConstantOffset(ConstantOffset) { assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } @@ -604,9 +603,9 @@ template <> struct DenseMapInfo { unsigned DenseMapInfo::getHashValue(const GEPValue &Val) { auto *GEP = cast(Val.Inst); - if (Val.HasConstantOffset) + if (Val.ConstantOffset.has_value()) return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(), - Val.ConstantOffset); + Val.ConstantOffset.value()); return hash_combine( GEP->getOpcode(), hash_combine_range(GEP->value_op_begin(), GEP->value_op_end())); @@ -619,8 +618,8 @@ bool DenseMapInfo::isEqual(const GEPValue &LHS, const GEPValue &RHS) { auto *RGEP = cast(RHS.Inst); if (LGEP->getPointerOperand() != RGEP->getPointerOperand()) return false; - if (LHS.HasConstantOffset && RHS.HasConstantOffset) - return LHS.ConstantOffset == RHS.ConstantOffset; + if (LHS.ConstantOffset.has_value() && RHS.ConstantOffset.has_value()) + return LHS.ConstantOffset.value() == RHS.ConstantOffset.value(); return LGEP->isIdenticalToWhenDefined(RGEP); } @@ -1650,9 +1649,11 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Compare GEP instructions based on offset. if (GEPValue::canHandle(&Inst)) { auto *GEP = cast(&Inst); - APInt Offset(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0); - bool HasConstantOffset = GEP->accumulateConstantOffset(SQ.DL, Offset); - GEPValue GEPVal(GEP, Offset, HasConstantOffset); + std::optional Offset = + APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0); + GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset.value()) + ? Offset + : std::nullopt); if (Value *V = AvailableGEPs.lookup(GEPVal)) { LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << " to: " << *V << '\n'); From 82f1eca7c180b0dfe8af36e933670f938a49bab1 Mon Sep 17 00:00:00 2001 From: DianQK Date: Tue, 19 Sep 2023 14:12:59 +0800 Subject: [PATCH 8/9] [EarlyCSE] Replace APInt with uint64_t --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 3048aaa997d8b..316b87262c56f 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -561,13 +561,13 @@ namespace { struct GEPValue { Instruction *Inst; - std::optional ConstantOffset; + std::optional ConstantOffset; GEPValue(Instruction *I) : Inst(I) { assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } - GEPValue(Instruction *I, std::optional ConstantOffset) + GEPValue(Instruction *I, std::optional ConstantOffset) : Inst(I), ConstantOffset(ConstantOffset) { assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } @@ -1649,10 +1649,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Compare GEP instructions based on offset. if (GEPValue::canHandle(&Inst)) { auto *GEP = cast(&Inst); - std::optional Offset = - APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0); - GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset.value()) - ? Offset + APInt Offset = APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0); + GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset) + ? Offset.tryZExtValue() : std::nullopt); if (Value *V = AvailableGEPs.lookup(GEPVal)) { LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << " to: " << *V From b6158f93f34e592d535577442b80e22cb41a3a37 Mon Sep 17 00:00:00 2001 From: DianQK Date: Tue, 19 Sep 2023 21:53:28 +0800 Subject: [PATCH 9/9] [EarlyCSE] Replace uint64_t with int64_t --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 316b87262c56f..4990fa9f8b5ea 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -561,13 +561,13 @@ namespace { struct GEPValue { Instruction *Inst; - std::optional ConstantOffset; + std::optional ConstantOffset; GEPValue(Instruction *I) : Inst(I) { assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } - GEPValue(Instruction *I, std::optional ConstantOffset) + GEPValue(Instruction *I, std::optional ConstantOffset) : Inst(I), ConstantOffset(ConstantOffset) { assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } @@ -1651,7 +1651,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { auto *GEP = cast(&Inst); APInt Offset = APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0); GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset) - ? Offset.tryZExtValue() + ? Offset.trySExtValue() : std::nullopt); if (Value *V = AvailableGEPs.lookup(GEPVal)) { LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << " to: " << *V