From 8fb055932c085da21f3b721995a06f42006744bd Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 2 Sep 2020 08:09:24 -0400 Subject: [PATCH 001/465] [VectorCombine] allow vector loads with mismatched insert type This is an enhancement to D81766 to allow loading the minimum target vector type into an IR vector with a different number of elements. In one of the motivating tests from PR16739, SLP creates <2 x float> load ops mixed with <4 x float> insert ops, so we want to handle that pattern in addition to potential oversized vectors created by the vectorizers. For now, we are assuming the insert/extract subvector with undef is free because there is no exact corresponding TTI modeling for that. Differential Revision: https://reviews.llvm.org/D86160 --- .../Transforms/Vectorize/VectorCombine.cpp | 36 ++++++++++++------- .../test/Transforms/VectorCombine/X86/load.ll | 28 ++++++--------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 1cc0e40da3a2b..a954b9b293154 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -100,36 +100,36 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { Type *ScalarTy = Scalar->getType(); if (!Load || !Load->isSimple()) return false; + auto *Ty = dyn_cast(I.getType()); + if (!Ty) + return false; // TODO: Extend this to match GEP with constant offsets. Value *PtrOp = Load->getPointerOperand()->stripPointerCasts(); assert(isa(PtrOp->getType()) && "Expected a pointer type"); - unsigned VectorSize = TTI.getMinVectorRegisterBitWidth(); + unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth(); uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); - if (!ScalarSize || !VectorSize || VectorSize % ScalarSize != 0) + if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0) return false; // Check safety of replacing the scalar load with a larger vector load. - unsigned VecNumElts = VectorSize / ScalarSize; - auto *VectorTy = VectorType::get(ScalarTy, VecNumElts, false); - // TODO: Allow insert/extract subvector if the type does not match. - if (VectorTy != I.getType()) - return false; + unsigned MinVecNumElts = MinVectorSize / ScalarSize; + auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); Align Alignment = Load->getAlign(); const DataLayout &DL = I.getModule()->getDataLayout(); - if (!isSafeToLoadUnconditionally(PtrOp, VectorTy, Alignment, DL, Load, &DT)) + if (!isSafeToLoadUnconditionally(PtrOp, MinVecTy, Alignment, DL, Load, &DT)) return false; unsigned AS = Load->getPointerAddressSpace(); // Original pattern: insertelt undef, load [free casts of] ScalarPtr, 0 int OldCost = TTI.getMemoryOpCost(Instruction::Load, ScalarTy, Alignment, AS); - APInt DemandedElts = APInt::getOneBitSet(VecNumElts, 0); - OldCost += TTI.getScalarizationOverhead(VectorTy, DemandedElts, true, false); + APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); + OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts, true, false); // New pattern: load VecPtr - int NewCost = TTI.getMemoryOpCost(Instruction::Load, VectorTy, Alignment, AS); + int NewCost = TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS); // We can aggressively convert to the vector form because the backend can // invert this transform if it does not result in a performance win. @@ -139,8 +139,18 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // It is safe and potentially profitable to load a vector directly: // inselt undef, load Scalar, 0 --> load VecPtr IRBuilder<> Builder(Load); - Value *CastedPtr = Builder.CreateBitCast(PtrOp, VectorTy->getPointerTo(AS)); - LoadInst *VecLd = Builder.CreateAlignedLoad(VectorTy, CastedPtr, Alignment); + Value *CastedPtr = Builder.CreateBitCast(PtrOp, MinVecTy->getPointerTo(AS)); + Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment); + + // If the insert type does not match the target's minimum vector type, + // use an identity shuffle to shrink/grow the vector. + if (Ty != MinVecTy) { + unsigned OutputNumElts = Ty->getNumElements(); + SmallVector Mask(OutputNumElts, UndefMaskElem); + for (unsigned i = 0; i < OutputNumElts && i < MinVecNumElts; ++i) + Mask[i] = i; + VecLd = Builder.CreateShuffleVector(VecLd, UndefValue::get(MinVecTy), Mask); + } replaceValue(I, *VecLd); ++NumVecLoad; return true; diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index e24ffb8da66f2..f0c5b6ef7ad81 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -346,12 +346,11 @@ define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(1 ret <4 x float> %r } -; TODO: Should load v4i32. - define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_i32_insert_v8i32( -; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %s = load i32, i32* %p, align 4 @@ -359,13 +358,10 @@ define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) { ret <8 x i32> %r } -; TODO: Should load v4i32. - define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceable(16) %p) { ; CHECK-LABEL: @casted_load_i32_insert_v8i32( -; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i32>* [[P:%.*]] to i32* -; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 4 -; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %b = bitcast <4 x i32>* %p to i32* @@ -374,12 +370,11 @@ define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceabl ret <8 x i32> %r } -; TODO: Should load v4f32. - define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v16f32( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = insertelement <16 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[R]] ; %s = load float, float* %p, align 4 @@ -387,12 +382,11 @@ define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) ret <16 x float> %r } -; TODO: Should load v4f32. - define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v2f32( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[R]] ; %s = load float, float* %p, align 4 From 626c3738cdfa49527097fccdf89e22949138ade7 Mon Sep 17 00:00:00 2001 From: Venkataramanan Kumar Date: Wed, 2 Sep 2020 08:23:48 -0400 Subject: [PATCH 002/465] [InstCombine] Transform 1.0/sqrt(X) * X to X/sqrt(X) These transforms will now be performed irrespective of the number of uses for the expression "1.0/sqrt(X)": 1.0/sqrt(X) * X => X/sqrt(X) X * 1.0/sqrt(X) => X/sqrt(X) We already handle more general cases, and we are intentionally not creating extra (and likely expensive) fdiv ops in IR. This pattern is the exception to the rule because we always expect the Backend to reduce X/sqrt(X) to sqrt(X), if it has the necessary (reassoc) fast-math-flags. Ref: DagCombiner optimizes the X/sqrt(X) to sqrt(X). Differential Revision: https://reviews.llvm.org/D86726 --- .../InstCombine/InstCombineMulDivRem.cpp | 15 +++++++++++++++ llvm/test/Transforms/InstCombine/fmul-sqrt.ll | 4 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 26db91cc51129..99f19d9663b7b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -544,6 +544,21 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { return replaceInstUsesWith(I, Sqrt); } + // The following transforms are done irrespective of the number of uses + // for the expression "1.0/sqrt(X)". + // 1) 1.0/sqrt(X) * X -> X/sqrt(X) + // 2) X * 1.0/sqrt(X) -> X/sqrt(X) + // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it + // has the necessary (reassoc) fast-math-flags. + if (I.hasNoSignedZeros() && + match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && + match(Y, m_Intrinsic(m_Value(X))) && Op1 == X) + return BinaryOperator::CreateFDivFMF(X, Y, &I); + if (I.hasNoSignedZeros() && + match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && + match(Y, m_Intrinsic(m_Value(X))) && Op0 == X) + return BinaryOperator::CreateFDivFMF(X, Y, &I); + // Like the similar transform in instsimplify, this requires 'nsz' because // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0. if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 && diff --git a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll index de030bb59c568..e77a828729e1d 100644 --- a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll +++ b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll @@ -103,7 +103,7 @@ define double @rsqrt_x_reassociate_extra_use(double %x, double * %p) { ; CHECK-LABEL: @rsqrt_x_reassociate_extra_use( ; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) ; CHECK-NEXT: [[RSQRT:%.*]] = fdiv double 1.000000e+00, [[SQRT]] -; CHECK-NEXT: [[RES:%.*]] = fmul reassoc nsz double [[RSQRT]], [[X]] +; CHECK-NEXT: [[RES:%.*]] = fdiv reassoc nsz double [[X:%.*]], [[SQRT]] ; CHECK-NEXT: store double [[RSQRT]], double* [[P:%.*]], align 8 ; CHECK-NEXT: ret double [[RES]] ; @@ -119,7 +119,7 @@ define <2 x float> @x_add_y_rsqrt_reassociate_extra_use(<2 x float> %x, <2 x flo ; CHECK-NEXT: [[ADD:%.*]] = fadd fast <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ADD]]) ; CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x float> , [[SQRT]] -; CHECK-NEXT: [[RES:%.*]] = fmul fast <2 x float> [[ADD]], [[RSQRT]] +; CHECK-NEXT: [[RES:%.*]] = fdiv fast <2 x float> [[ADD]], [[SQRT]] ; CHECK-NEXT: store <2 x float> [[RSQRT]], <2 x float>* [[P:%.*]], align 8 ; CHECK-NEXT: ret <2 x float> [[RES]] ; From f5ed22f09dd95c879b57a11c42d2fa7f5ef5e72d Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Wed, 26 Aug 2020 16:41:04 +0000 Subject: [PATCH 003/465] [mlir][VectorToSCF] 128 byte alignment of alloc ops Added 128 byte alignment to alloc ops created in VectorToSCF pass. 128b alignment was already introduced to this pass but not to all alloc ops. This commit changes that by adding 128b alignment to the remaining ops. The point of specifying alignment is to prevent possible memory alignment errors on weakly tested architectures. Differential Revision: https://reviews.llvm.org/D86454 --- mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp | 12 ++++++++---- .../test/Conversion/VectorToSCF/vector-to-loops.mlir | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 0f428f887d124..267aea90cc9d7 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -35,6 +35,8 @@ #include "mlir/Pass/Pass.h" #include "mlir/Transforms/Passes.h" +#define ALIGNMENT_SIZE 128 + using namespace mlir; using namespace mlir::edsc; using namespace mlir::edsc::intrinsics; @@ -232,8 +234,8 @@ static Value setAllocAtFunctionEntry(MemRefType memRefMinorVectorType, op->getParentWithTrait(); assert(scope && "Expected op to be inside automatic allocation scope"); b.setInsertionPointToStart(&scope->getRegion(0).front()); - Value res = - std_alloca(memRefMinorVectorType, ValueRange{}, b.getI64IntegerAttr(128)); + Value res = std_alloca(memRefMinorVectorType, ValueRange{}, + b.getI64IntegerAttr(ALIGNMENT_SIZE)); return res; } @@ -575,7 +577,8 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( steps.push_back(std_constant_index(step)); // 2. Emit alloc-copy-load-dealloc. - Value tmp = std_alloc(tmpMemRefType(transfer)); + Value tmp = std_alloc(tmpMemRefType(transfer), ValueRange{}, + rewriter.getI64IntegerAttr(ALIGNMENT_SIZE)); StdIndexedValue local(tmp); Value vec = vector_type_cast(tmp); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { @@ -648,7 +651,8 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( steps.push_back(std_constant_index(step)); // 2. Emit alloc-store-copy-dealloc. - Value tmp = std_alloc(tmpMemRefType(transfer)); + Value tmp = std_alloc(tmpMemRefType(transfer), ValueRange{}, + rewriter.getI64IntegerAttr(ALIGNMENT_SIZE)); StdIndexedValue local(tmp); Value vec = vector_type_cast(tmp); std_store(vectorValue, vec); diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir index 5c2da799d861e..b19ea9dde7933 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -68,7 +68,7 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} { // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} { // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 { - // CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32> + // CHECK: %[[ALLOC:.*]] = alloc() {alignment = 128 : i64} : memref<5x4x3xf32> // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { @@ -145,7 +145,7 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} step 4 { // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} { // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 { - // CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32> + // CHECK: %[[ALLOC:.*]] = alloc() {alignment = 128 : i64} : memref<5x4x3xf32> // CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4x3xf32> // CHECK: store %{{.*}}, {{.*}} : memref> // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { From 6d36b22b219f663b9b8317147ea8f7a9cb4e18dc Mon Sep 17 00:00:00 2001 From: David Stenberg Date: Wed, 2 Sep 2020 08:46:53 +0200 Subject: [PATCH 004/465] [GlobalOpt] Fix an incorrect Modified status When marking a global variable constant, and simplifying users using CleanupConstantGlobalUsers(), the pass could incorrectly return false if there were still some uses left, and no further optimizations was done. This was caught using the check introduced by D80916. This fixes PR46749. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D85837 --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 12 +++++--- .../GlobalOpt/const-return-status-atomic.ll | 27 ++++++++++++++++++ .../GlobalOpt/const-return-status.ll | 28 +++++++++++++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/GlobalOpt/const-return-status-atomic.ll create mode 100644 llvm/test/Transforms/GlobalOpt/const-return-status.ll diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 05d1465b3663b..f3053398cd5ae 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1990,12 +1990,13 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, return true; } + bool Changed = false; + // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.IsLoaded) { LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); - bool Changed; if (isLeakCheckerRoot(GV)) { // Delete any constant stores to the global. Changed = CleanupPointerRootUsers(GV, GetTLI); @@ -2021,11 +2022,14 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, // Don't actually mark a global constant if it's atomic because atomic loads // are implemented by a trivial cmpxchg in some edge-cases and that usually // requires write access to the variable even if it's not actually changed. - if (GS.Ordering == AtomicOrdering::NotAtomic) + if (GS.Ordering == AtomicOrdering::NotAtomic) { + assert(!GV->isConstant() && "Expected a non-constant global"); GV->setConstant(true); + Changed = true; + } // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); + Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); // If the global is dead now, just nuke it. if (GV->use_empty()) { @@ -2085,7 +2089,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, } } - return false; + return Changed; } /// Analyze the specified global variable and optimize it if possible. If we diff --git a/llvm/test/Transforms/GlobalOpt/const-return-status-atomic.ll b/llvm/test/Transforms/GlobalOpt/const-return-status-atomic.ll new file mode 100644 index 0000000000000..f52ba05e6c19b --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/const-return-status-atomic.ll @@ -0,0 +1,27 @@ +; RUN: opt -globalopt < %s -S -o - | FileCheck %s + +; When simplifying users of a global variable, the pass could incorrectly +; return false if there were still some uses left, and no further optimizations +; was done. This was caught by the pass return status check that is hidden +; under EXPENSIVE_CHECKS. + +@GV1 = internal unnamed_addr global i64 1, align 8 + +; CHECK: @GV1 = internal unnamed_addr global i64 1, align 8 + +define void @test1() local_unnamed_addr { +; CHECK-LABEL: @test1 +; CHECK-NEXT: %val = load atomic i8 +; CHECK-NEXT: ret void + + %val = load atomic i8, i8* bitcast (i64* @GV1 to i8*) acquire, align 8 + ret void +} + +define i64 @test2() local_unnamed_addr { +; CHECK-LABEL: @test2 +; CHECK-NEXT: ret i64 1 + + %val = load atomic i64, i64* @GV1 acquire, align 8 + ret i64 %val +} diff --git a/llvm/test/Transforms/GlobalOpt/const-return-status.ll b/llvm/test/Transforms/GlobalOpt/const-return-status.ll new file mode 100644 index 0000000000000..32c4eb895dc1a --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/const-return-status.ll @@ -0,0 +1,28 @@ +; RUN: opt -globalopt < %s -S -o - | FileCheck %s + +; When simplifying users of a global variable, the pass could incorrectly +; return false if there were still some uses left, and no further optimizations +; was done. This was caught by the pass return status check that is hidden +; under EXPENSIVE_CHECKS. + +; CHECK: @src = internal unnamed_addr constant + +; CHECK: entry: +; CHECK-NEXT: %call = call i32 @f(i32 0) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (i32* @dst to i8*), i8* align 4 bitcast ([1 x i32]* @src to i8*), i64 1, i1 false) +; CHECK-NEXT: ret void + +@src = internal unnamed_addr global [1 x i32] zeroinitializer, align 4 +@dst = external dso_local local_unnamed_addr global i32, align 4 + +define dso_local void @d() local_unnamed_addr { +entry: + %0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @src, i64 0, i64 0), align 4 + %call = call i32 @f(i32 %0) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (i32* @dst to i8*), i8* align 4 bitcast ([1 x i32]* @src to i8*), i64 1, i1 false) + ret void +} + +declare dso_local i32 @f(i32) local_unnamed_addr + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) From 1b88bbf5eb80b38a4dee129df969d5632993fdd1 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Wed, 2 Sep 2020 09:24:36 -0400 Subject: [PATCH 005/465] Revert "[mlir] Extend BufferAssignmentTypeConverter with result conversion callbacks" This reverts commit 94f5d248772ba0f1f9c8b0746fe75a5d246c5540 because of failing the following tests: MLIR :: Dialect/Linalg/tensors-to-buffers.mlir MLIR :: Transforms/buffer-placement-preparation-allowed-memref-results.mlir MLIR :: Transforms/buffer-placement-preparation.mlir --- .../include/mlir/Transforms/BufferPlacement.h | 344 +++++++++--------- .../Linalg/Transforms/TensorsToBuffers.cpp | 11 +- mlir/lib/Transforms/BufferPlacement.cpp | 220 +---------- ...nt-preparation-allowed-memref-results.mlir | 66 ---- .../buffer-placement-preparation.mlir | 85 ----- mlir/test/lib/Dialect/Test/TestOps.td | 29 +- .../lib/Transforms/TestBufferPlacement.cpp | 48 +-- 7 files changed, 191 insertions(+), 612 deletions(-) diff --git a/mlir/include/mlir/Transforms/BufferPlacement.h b/mlir/include/mlir/Transforms/BufferPlacement.h index 8fc254e6be1e2..f8559a9dd9396 100644 --- a/mlir/include/mlir/Transforms/BufferPlacement.h +++ b/mlir/include/mlir/Transforms/BufferPlacement.h @@ -52,111 +52,6 @@ class BufferAssignmentPlacer { Operation *operation; }; -/// A helper type converter class for using inside Buffer Assignment operation -/// conversion patterns. The default constructor keeps all the types intact -/// except for the ranked-tensor types which is converted to memref types. -class BufferAssignmentTypeConverter : public TypeConverter { -public: - /// This enum is for showing how buffer placement operation converters should - /// conduct with certain result type after type conversion. This value can be - /// set/get for each specific type using setResultConversionKind or - /// getResultConversionKind. - enum ResultConversionKind { AppendToArgumentsList, KeepAsFunctionResult }; - - BufferAssignmentTypeConverter(); - - /// This method tries to decompose a value of a certain type using provided - /// decompose callback functions. If it is unable to do so, the original value - /// is returned. - void tryDecomposeValue(OpBuilder &, Location, Type, Value, - SmallVectorImpl &); - - /// This method tries to decompose a type using provided decompose callback - /// functions. If it is unable to do so, the original type is returned. - void tryDecomposeType(Type, SmallVectorImpl &); - - /// This method registers a callback function that will be called to decompose - /// a value of a certain type into several values. - template ::template arg_t<2>> - void addDecomposeValueConversion(FnT &&callback) { - decomposeValueConversions.emplace_back( - wrapDecomposeValueConversionCallback(std::forward(callback))); - } - - /// This method registers a callback function that will be called to decompose - /// a type into several types. - template ::template arg_t<0>> - void addDecomposeTypeConversion(FnT &&callback) { - auto wrapper = - wrapDecomposeTypeConversionCallback(std::forward(callback)); - decomposeTypeConversions.emplace_back(wrapper); - addConversion(std::forward(callback)); - } - - /// This method returns ResultConversionKind for the mapping from `origin` - /// type to `input` type. - ResultConversionKind getResultConversionKind(Type origin, Type input); - - /// This method registers ResultConversionKind for the mapping from type 'T' - /// to type 'U'. - template - void setResultConversionKind(ResultConversionKind kind) { - assert((kind != AppendToArgumentsList || - llvm::is_one_of::value) && - "Only the memref typed values can be set to be appended to the " - "function argument list at the moment"); - resultTypeConversions.emplace_back( - [&](Type origin, Type input) -> Optional { - if (origin.template isa() && input.template isa()) - return kind; - return llvm::None; - }); - } - -private: - using DecomposeValueConversionCallFn = std::function( - OpBuilder &, Location, Type, Value, SmallVectorImpl &)>; - - using DecomposeTypeConversionCallFn = - std::function(Type, SmallVectorImpl &)>; - - using ResultConversionKindFn = - std::function(Type, Type)>; - - /// Generate a wrapper for the given decompose value conversion callback. - template - DecomposeValueConversionCallFn - wrapDecomposeValueConversionCallback(FnT &&callback) { - return [callback = std::forward(callback)]( - OpBuilder &builder, Location loc, Type type, Value value, - SmallVectorImpl &newValues) -> Optional { - if (T derivedType = type.dyn_cast()) - return callback(builder, loc, derivedType, value, newValues); - return llvm::None; - }; - } - - /// Generate a wrapper for the given decompose type conversion callback. - template - DecomposeTypeConversionCallFn - wrapDecomposeTypeConversionCallback(FnT &&callback) { - return [callback = std::forward(callback)]( - Type type, - SmallVectorImpl &results) -> Optional { - T derivedType = type.dyn_cast(); - if (!derivedType) - return llvm::None; - return callback(derivedType, results); - }; - } - - SmallVector resultTypeConversions; - SmallVector decomposeValueConversions; - SmallVector decomposeTypeConversions; -}; - /// Helper conversion pattern that encapsulates a BufferAssignmentPlacer /// instance. Sample usage: /// class CustomConversionPattern : public @@ -173,22 +68,43 @@ class BufferAssignmentOpConversionPattern public: explicit BufferAssignmentOpConversionPattern( MLIRContext *context, BufferAssignmentPlacer *bufferAssignment = nullptr, - BufferAssignmentTypeConverter *converter = nullptr, - PatternBenefit benefit = 1) + TypeConverter *converter = nullptr, PatternBenefit benefit = 1) : OpConversionPattern(context, benefit), - bufferAssignment(bufferAssignment), converter(converter) { - assert(converter && "The type converter has not been defined"); - } + bufferAssignment(bufferAssignment), converter(converter) {} protected: BufferAssignmentPlacer *bufferAssignment; - BufferAssignmentTypeConverter *converter; + TypeConverter *converter; +}; + +/// A helper type converter class for using inside Buffer Assignment operation +/// conversion patterns. The default constructor keeps all the types intact +/// except for the ranked-tensor types which is converted to memref types. +class BufferAssignmentTypeConverter : public TypeConverter { +public: + BufferAssignmentTypeConverter(); + + /// A helper function to check if `type` has been converted from non-memref + /// type to memref. + static bool isConvertedMemref(Type type, Type before); }; -/// Converts the signature of the function using BufferAssignmentTypeConverter. -/// Each result type of the function is kept as a function result or appended to -/// the function arguments list based on ResultConversionKind for the converted -/// result type. +namespace detail { + +/// Converts the signature of the function based on whether the function is +/// allowed to return memref typed results or not using +/// `allowMemrefFunctionResults` parameter. If this option is false, then it +/// adds an extra function argument as an output buffer for each function result +/// which is going to be a memref type only after type conversion. The +/// other function result types remain unchanged. If +/// `allowMemrefFunctionResults` is true, the types are converted in place. +/// Any changes in function signature need to be applied +/// to return and caller operations. `BufferAssignmentReturnOpConverter` and +/// `BufferAssignmentCallOpConverter` are two helper function that match the +/// return and caller operation with the new function signature. Furthermore, +/// `BufferAssignmentTypeConverter` is a helper `TypeConverter` for converting +/// tensor typed values to memref typed ones. +template class BufferAssignmentFuncOpConverter : public BufferAssignmentOpConversionPattern { public: @@ -196,16 +112,58 @@ class BufferAssignmentFuncOpConverter FuncOp>::BufferAssignmentOpConversionPattern; /// Performs the actual signature rewriting step. - LogicalResult matchAndRewrite(mlir::FuncOp, ArrayRef, - ConversionPatternRewriter &) const; + LogicalResult + matchAndRewrite(mlir::FuncOp funcOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + if (!converter) + return funcOp.emitError("The type converter has not been defined for " + "BufferAssignmentFuncOpConverter"); + auto funcType = funcOp.getType(); + + // Convert function arguments using the provided TypeConverter. + TypeConverter::SignatureConversion conversion(funcType.getNumInputs()); + for (auto argType : llvm::enumerate(funcType.getInputs())) + conversion.addInputs(argType.index(), + converter->convertType(argType.value())); + + // If allowMemrefFunctionResults is false and a function result type is not + // a memref but it would be a memref after type conversion, a new argument + // should be appended to the function arguments list for this result. + // Otherwise, it remains unchanged as a function result. + SmallVector newResultTypes; + newResultTypes.reserve(funcOp.getNumResults()); + for (Type resType : funcType.getResults()) { + Type convertedType = converter->convertType(resType); + if (!allowMemrefFunctionResults && + BufferAssignmentTypeConverter::isConvertedMemref(convertedType, + resType)) + conversion.addInputs(convertedType); + else + newResultTypes.push_back(convertedType); + } + if (failed(rewriter.convertRegionTypes(&funcOp.getBody(), *converter, + &conversion))) + return failure(); + + // Update the signature of the function. + rewriter.updateRootInPlace(funcOp, [&] { + funcOp.setType(rewriter.getFunctionType(conversion.getConvertedTypes(), + newResultTypes)); + }); + return success(); + } }; /// Rewrites the `ReturnOp` to conform with the changed function signature. -/// Operands that correspond to return values and their types have been set to -/// AppendToArgumentsList are dropped. In their place, a corresponding copy -/// operation from the operand to the target function argument is inserted. +/// if allowMemrefFunctionResults is false, operands that correspond to return +/// values and have been rewritten from illegal typed results to memref +/// arguments are dropped. In their place, a corresponding copy operation from +/// the operand to the output function argument is inserted. Otherwise, the +/// memref typed operands are returned. +/// Note: If this pattern rewriter is used with BufferAssignmentFuncOpConverter, +/// allowMemrefFunctionResults must be set/unset for both. template + typename CopyOpTy, bool allowMemrefFunctionResults> class BufferAssignmentReturnOpConverter : public BufferAssignmentOpConversionPattern { public: @@ -216,48 +174,44 @@ class BufferAssignmentReturnOpConverter LogicalResult matchAndRewrite(ReturnOpSourceTy returnOp, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { - Location loc = returnOp.getLoc(); - - // Split the operands depending on whether they need a copy operation or - // they remain as operands of the return operation. If an operand is - // decomposable and a decompose callback function has been provided by the - // user, it will be unpacked. - SmallVector newOperands, needCopyOperands; - OpBuilder builder(returnOp); - for (auto operand : llvm::enumerate(operands)) { - SmallVector values; - this->converter->tryDecomposeValue( - builder, loc, operand.value().getType(), operand.value(), values); - Type type = returnOp.getOperand(operand.index()).getType(); - SmallVector originTypes; - this->converter->tryDecomposeType(type, originTypes); - for (auto value : llvm::enumerate(values)) { - Type origin = originTypes[value.index()]; - Type converted = value.value().getType(); - auto kind = this->converter->getResultConversionKind(origin, converted); - if (kind == BufferAssignmentTypeConverter::KeepAsFunctionResult) - newOperands.push_back(value.value()); - else - // kind = BufferAssignmentTypeConverter::AppendToArgumentsList - needCopyOperands.push_back(value.value()); - } + // If the memref typed results can be returned as function results, the new + // `ReturnOp` should only return the type converted operands. + if (allowMemrefFunctionResults) { + rewriter.replaceOpWithNewOp(returnOp, operands); + return success(); } - // Insert Copy operations instead for the operands that have been removed - // from operand list and appended to the function arguments list. + // Split the operands by their kinds whether they are converted memref or + // not. + SmallVector needCopyOperands, newOperands; + unsigned operandsSize = operands.size(); + needCopyOperands.reserve(operandsSize); + newOperands.reserve(operandsSize); + for (auto operand : llvm::enumerate(operands)) + if (BufferAssignmentTypeConverter::isConvertedMemref( + operand.value().getType(), + returnOp.getOperand(operand.index()).getType())) + needCopyOperands.push_back(operand.value()); + else + newOperands.push_back(operand.value()); + Block &entryBlock = returnOp.getParentRegion()->front(); unsigned numFuncArgs = entryBlock.getNumArguments(); - if (needCopyOperands.size() > numFuncArgs) - return returnOp.emitError( - "The number of operands that need Copy operations is more " - "than the number of target function arguments."); + + // Find the index of the first destination buffer. + assert(needCopyOperands.size() <= numFuncArgs && + "The number of operands of return operation is more than the " + "number of function arguments."); unsigned destArgNum = numFuncArgs - needCopyOperands.size(); rewriter.setInsertionPoint(returnOp); for (Value operand : needCopyOperands) { - rewriter.create(loc, operand, + // Insert a `CopyOp` for each converted memref-type operand. + rewriter.create(returnOp.getLoc(), operand, entryBlock.getArgument(destArgNum)); ++destArgNum; } + + // Insert the new target Return operation. rewriter.replaceOpWithNewOp(returnOp, newOperands); return success(); } @@ -265,32 +219,94 @@ class BufferAssignmentReturnOpConverter /// Rewrites the `CallOp` to match its operands and results with the signature /// of the callee after rewriting the callee with -/// BufferAssignmentFuncOpConverter. +/// BufferAssignmentFuncOpConverter. If allowMemrefFunctionResults is false, a +/// buffer is allocated as an output buffer only for each memref typed result +/// that has been rewritten. The new allocated buffer is passed through the +/// operands list of the new `CallOp`. +/// Note: If this pattern rewriter is used with BufferAssignmentFuncOpConverter, +/// allowMemrefFunctionResults must be set/unset for both. +template class BufferAssignmentCallOpConverter : public BufferAssignmentOpConversionPattern { public: using BufferAssignmentOpConversionPattern< CallOp>::BufferAssignmentOpConversionPattern; - /// Performs the actual rewriting step. - LogicalResult matchAndRewrite(CallOp, ArrayRef, - ConversionPatternRewriter &) const; + LogicalResult + matchAndRewrite(CallOp callOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + if (!converter) + return callOp.emitError("The type converter has not been defined for " + "BufferAssignmentCallOpConverter"); + Location loc = callOp.getLoc(); + + // If the memref typed results can be returned as function results, there is + // no need to create output buffers. It is only required to convert the type + // of operands and results in place for creating the new `CallOp`. + if (allowMemrefFunctionResults) { + SmallVector resultTypes; + resultTypes.reserve(callOp.getNumResults()); + for (Type type : callOp.getResultTypes()) + resultTypes.push_back(converter->convertType(type)); + rewriter.replaceOpWithNewOp(callOp, callOp.getCallee(), + resultTypes, operands); + return success(); + } + + SmallVector newOperands, replacingValues; + SmallVector newResultTypes; + unsigned numResults = callOp.getNumResults(); + newOperands.reserve(numResults + operands.size()); + newOperands.append(operands.begin(), operands.end()); + newResultTypes.reserve(numResults); + replacingValues.reserve(numResults); + + // For each memref result of `CallOp` which has not been a memref before + // the type conversion, a new buffer is allocated and passed to the operands + // list of the new `CallOp`. Otherwise, it remains as a caller result. + for (Value result : callOp.getResults()) { + Type currType = result.getType(); + Type newType = converter->convertType(result.getType()); + if (BufferAssignmentTypeConverter::isConvertedMemref(newType, currType)) { + OpBuilder::InsertionGuard guard(rewriter); + rewriter.restoreInsertionPoint(bufferAssignment->computeAllocPosition( + result.dyn_cast())); + Value alloc = + rewriter.create(loc, newType.dyn_cast()); + newOperands.push_back(alloc); + replacingValues.push_back(alloc); + } else { + newResultTypes.push_back(currType); + + // No replacing is required. + replacingValues.push_back(nullptr); + } + } + + // Creating the new `CallOp`. + rewriter.create(loc, callOp.getCallee(), newResultTypes, + newOperands); + + // Replacing the results of the old `CallOp`. + rewriter.replaceOp(callOp, replacingValues); + return success(); + } }; +} // end namespace detail /// Populates `patterns` with the conversion patterns of buffer /// assignment. template + typename CopyOpTy, bool allowMemrefFunctionResults> static void populateWithBufferAssignmentOpConversionPatterns( MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, - OwningRewritePatternList *patterns) { + TypeConverter *converter, OwningRewritePatternList *patterns) { // clang-format off patterns->insert< - BufferAssignmentCallOpConverter, - BufferAssignmentFuncOpConverter, - BufferAssignmentReturnOpConverter - + detail::BufferAssignmentCallOpConverter, + detail::BufferAssignmentFuncOpConverter, + detail::BufferAssignmentReturnOpConverter + >(context, placer, converter); // clang-format on } diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp index 89a01f9ca6292..04c1fbd5d5655 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp @@ -100,11 +100,11 @@ class GenericOpConverter /// tensors to buffers. static void populateConvertLinalgOnTensorsToBuffersPattern( MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, - OwningRewritePatternList *patterns) { + TypeConverter *converter, OwningRewritePatternList *patterns) { populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, placer, - converter, patterns); + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp, + /*allowMemrefFunctionResults=*/false>(context, placer, converter, + patterns); patterns->insert(context, placer, converter); } @@ -141,9 +141,6 @@ struct ConvertLinalgOnTensorsToBuffers converter.isLegal(&funcOp.getBody()); }); - converter.setResultConversionKind( - BufferAssignmentTypeConverter::AppendToArgumentsList); - // Walk over all the functions to apply buffer assignment. getOperation().walk([&](FuncOp function) -> WalkResult { OwningRewritePatternList patterns; diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp index 1ab3e7e2e48dc..201570a244ffc 100644 --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -713,223 +713,9 @@ BufferAssignmentTypeConverter::BufferAssignmentTypeConverter() { }); } -/// This method tries to decompose a value of a certain type using provided -/// decompose callback functions. If it is unable to do so, the original value -/// is returned. -void BufferAssignmentTypeConverter::tryDecomposeValue( - OpBuilder &builder, Location loc, Type type, Value value, - SmallVectorImpl &results) { - for (auto conversion : decomposeValueConversions) - if (conversion(builder, loc, type, value, results) != llvm::None) - return; - results.push_back(value); -} - -/// This method tries to decompose a type using provided decompose callback -/// functions. If it is unable to do so, the original type is returned. -void BufferAssignmentTypeConverter::tryDecomposeType( - Type type, SmallVectorImpl &types) { - for (auto conversion : decomposeTypeConversions) - if (conversion(type, types) != llvm::None) - return; - types.push_back(type); -} - -/// This method returns ResultConversionKind for the input type. -BufferAssignmentTypeConverter::ResultConversionKind -BufferAssignmentTypeConverter::getResultConversionKind(Type origin, - Type converted) { - for (auto conversion : resultTypeConversions) { - auto res = conversion(origin, converted); - if (res != llvm::None) - return res.getValue(); - } - return KeepAsFunctionResult; -} - -//===----------------------------------------------------------------------===// -// BufferAssignmentFuncOpConverter -//===----------------------------------------------------------------------===// - -/// Performs the actual function signature rewriting step. -LogicalResult BufferAssignmentFuncOpConverter::matchAndRewrite( - mlir::FuncOp funcOp, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - auto funcType = funcOp.getType(); - - // Convert function arguments using the provided TypeConverter. - TypeConverter::SignatureConversion conversion(funcType.getNumInputs()); - for (auto argType : llvm::enumerate(funcType.getInputs())) { - SmallVector decomposedTypes, convertedTypes; - converter->tryDecomposeType(argType.value(), decomposedTypes); - converter->convertTypes(decomposedTypes, convertedTypes); - conversion.addInputs(argType.index(), convertedTypes); - } - - // Convert the result types of the function. - SmallVector newResultTypes; - newResultTypes.reserve(funcOp.getNumResults()); - for (Type resultType : funcType.getResults()) { - SmallVector originTypes; - converter->tryDecomposeType(resultType, originTypes); - for (auto origin : originTypes) { - Type converted = converter->convertType(origin); - auto kind = converter->getResultConversionKind(origin, converted); - if (kind == BufferAssignmentTypeConverter::AppendToArgumentsList) - conversion.addInputs(converted); - else - // kind = BufferAssignmentTypeConverter::KeepAsFunctionResult - newResultTypes.push_back(converted); - } - } - - if (failed(rewriter.convertRegionTypes(&funcOp.getBody(), *converter, - &conversion))) - return failure(); - - // Update the signature of the function. - rewriter.updateRootInPlace(funcOp, [&] { - funcOp.setType(rewriter.getFunctionType(conversion.getConvertedTypes(), - newResultTypes)); - }); - return success(); -} - -//===----------------------------------------------------------------------===// -// BufferAssignmentCallOpConverter -//===----------------------------------------------------------------------===// - -/// Performs the actual rewriting step. -LogicalResult BufferAssignmentCallOpConverter::matchAndRewrite( - CallOp callOp, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - - // This class represents a mapping from a result to a list of values and some - // results that have not yet constructed. Instead, the indices of these - // results in the operation that will be constructed are known. They will be - // replaced with the actual values when they are available. The order of - // adding to this mapping is important. - class ResultMapping { - public: - ResultMapping() { order = 0; }; - - /// Add an available value to the mapping. - void addMapping(Value value) { - toValuesMapping.push_back({order++, value}); - } - - /// Add the index of unavailble result value to the mapping. - void addMapping(unsigned index) { - toIndicesMapping.push_back({order++, index}); - } - - /// This method returns the mapping values list. The unknown result values - /// that only their indicies are available are replaced with their values. - void getMappingValues(ValueRange valuesToReplaceIndices, - SmallVectorImpl &values) { - // Append available values to the list. - SmallVector, 2> res(toValuesMapping.begin(), - toValuesMapping.end()); - // Replace the indices with the actual values. - llvm::for_each( - toIndicesMapping, [&](const std::pair &entry) { - assert(entry.second < valuesToReplaceIndices.size() && - "The value index is out of range."); - res.push_back({entry.first, valuesToReplaceIndices[entry.second]}); - }); - // Sort the values based on their adding orders. - llvm::sort(res, [](const std::pair &v1, - const std::pair &v2) { - return v1.first < v2.first; - }); - // Fill the values. - llvm::for_each(res, [&](const std::pair &entry) { - values.push_back(entry.second); - }); - } - - private: - /// Keeping the inserting order of mapping values. - int order; - - /// Containing the mapping values with their inserting orders. - SmallVector, 2> toValuesMapping; - - /// Containing the indices of result values with their inserting orders. - SmallVector, 2> toIndicesMapping; - }; - - Location loc = callOp.getLoc(); - OpBuilder builder(callOp); - SmallVector newOperands; - - // Create the operands list of the new `CallOp`. It unpacks the decomposable - // values if a decompose callback function has been provided by the user. - for (auto operand : operands) { - SmallVector values; - this->converter->tryDecomposeValue(builder, loc, operand.getType(), operand, - values); - newOperands.append(values.begin(), values.end()); - } - - // Create the new result types for the new `CallOp` and a mapping from the old - // result to new value(s). - SmallVector newResultTypes; - SmallVector mappings; - mappings.resize(callOp.getNumResults()); - for (auto result : llvm::enumerate(callOp.getResults())) { - SmallVector originTypes; - converter->tryDecomposeType(result.value().getType(), originTypes); - auto &resultMapping = mappings[result.index()]; - for (Type origin : originTypes) { - Type converted = converter->convertType(origin); - auto kind = converter->getResultConversionKind(origin, converted); - if (kind == BufferAssignmentTypeConverter::KeepAsFunctionResult) { - newResultTypes.push_back(converted); - // The result value is not yet available. Its index is kept and it is - // replaced with the actual value of the new `CallOp` later. - resultMapping.addMapping(newResultTypes.size() - 1); - } else { - // kind = BufferAssignmentTypeConverter::AppendToArgumentsList - OpBuilder::InsertionGuard guard(rewriter); - rewriter.restoreInsertionPoint( - bufferAssignment->computeAllocPosition(result.value())); - MemRefType memref = converted.dyn_cast(); - if (!memref) - return callOp.emitError("Cannot allocate for a non-Memref type"); - Value alloc = rewriter.create(loc, memref); - newOperands.push_back(alloc); - resultMapping.addMapping(alloc); - } - } - } - - CallOp newCallOp = rewriter.create(loc, callOp.getCallee(), - newResultTypes, newOperands); - - // Build a replacing value for each result to replace its uses. If a result - // has multiple mapping values, it needs to be packed to a single value. - OpBuilder nextBuilder(callOp.getOperation()->getNextNode()); - SmallVector replacedValues; - replacedValues.reserve(callOp.getNumResults()); - for (unsigned i = 0, e = callOp.getNumResults(); i < e; ++i) { - SmallVector valuesToPack; - mappings[i].getMappingValues(newCallOp.getResults(), valuesToPack); - if (valuesToPack.empty()) { - // No replacement is required. - replacedValues.push_back(nullptr); - } else if (valuesToPack.size() == 1) { - replacedValues.push_back(valuesToPack.front()); - } else { - // Values need to be packed using callback function. The same callback - // that is used for materializeArgumentConversion is used for packing. - Value packed = converter->materializeArgumentConversion( - nextBuilder, loc, callOp.getType(i), valuesToPack); - replacedValues.push_back(packed); - } - } - rewriter.replaceOp(callOp, replacedValues); - return success(); +/// Checks if `type` has been converted from non-memref type to memref. +bool BufferAssignmentTypeConverter::isConvertedMemref(Type type, Type before) { + return type.isa() && !before.isa(); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir index e1dacdf0184e2..084ac38af6e32 100644 --- a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir +++ b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir @@ -111,73 +111,7 @@ func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> { // CHECK: %[[Y:.*]]:2 = call @callee(%[[X]]#0) // CHECK: return %[[Y]]#0 -// ----- - -// Test case: Testing BufferAssginmnetCallOpConverter to see if it matches with the -// signature of the new signature of the callee function when there are tuple typed -// args and results. BufferAssginmentTypeConverter is set to flatten tuple typed -// arguments. The tuple typed values should be decomposed and composed using -// get_tuple_element and make_tuple operations of test dialect. Tensor types are -// converted to Memref. Memref typed function results remain as function results. -// CHECK-LABEL: func @callee -func @callee(%arg0: tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>){ - return %arg0 : tuple,i1, tensor<5xf32>> -} -// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>) -// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} -// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]] -// CHECK-LABEL: func @caller -func @caller(%arg0: tuple,i1, tensor<5xf32>>) -> tuple,i1, tensor<5xf32>>{ - %x0 = call @callee(%arg0) : (tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>) - %y0 = call @callee(%x0) : (tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>) - return %y0 : tuple,i1, tensor<5xf32>> -} -// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>) -// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -// CHECK-NEXT: %[[ARG_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 2 : i32} -// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]) -// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>) -// CHECK-NEXT: %[[RESULT_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 2 : i32} -// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]) -// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>) -// CHECK-NEXT: %[[RETURN_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 2 : i32} -// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]] -// ----- -// Test case: Testing BufferAssginmnetFuncOpConverter and -// BufferAssginmentReturnOpConverter to see if the return operation matches with -// the new function signature when there are tuple typed args and results. -// BufferAssginmentTypeConverter is set to flatten tuple typed arguments. The tuple -// typed values should be decomposed and composed using get_tuple_element and -// make_tuple operations of test dialect. Tensor types are converted to Memref. -// Memref typed function results remain as function results. - -// CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results -func @decompose_tuple_typed_function_args_and_results(%arg0: tuple, %arg1: tensor<10xf32>, %arg2: tuple>) -> (tuple>, tensor<10xf32>, tuple){ - return %arg2, %arg1, %arg0 : tuple>, tensor<10xf32>, tuple -} -// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32> -// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, i1, f32) -// CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]]) -// CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]]) -// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[SECOND_TUPLE_SECOND_ELEM]], %[[ARG2]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]] diff --git a/mlir/test/Transforms/buffer-placement-preparation.mlir b/mlir/test/Transforms/buffer-placement-preparation.mlir index b1cfdfd690cf6..064b0fd7e85a9 100644 --- a/mlir/test/Transforms/buffer-placement-preparation.mlir +++ b/mlir/test/Transforms/buffer-placement-preparation.mlir @@ -285,93 +285,8 @@ func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> { // CHECK: linalg.copy(%[[Y0]], %[[CALLER_RESULT]]) // CHECK: return -// ----- - // CHECK-LABEL: func @func_with_unranked_arg func @func_with_unranked_arg(%arg0: tensor<*xf32>) { return } // CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>) - -// ----- - -// Test case: Testing BufferAssginmnetCallOpConverter to see if it matches with the -// signature of the new signature of the callee function when there are tuple typed -// args and results. BufferAssginmentTypeConverter is set to flatten tuple typed -// arguments. The tuple typed values should be decomposed and composed using -// get_tuple_element and make_tuple operations of test dialect. Tensor types are -// converted to Memref. Memref typed function results are appended to the function -// arguments list. - -// CHECK-LABEL: func @callee -func @callee(%arg0: tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>){ - return %arg0 : tuple,i1, tensor<5xf32>> -} -// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, %[[RESULT1:.*]]: memref<5xf32>) -// CHECK-SAME: i1 -// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} -// CHECK-NEXT: linalg.copy(%[[FIRST_ELEM]], %[[RESULT0]]) -// CHECK-NEXT: linalg.copy(%[[THIRD_ELEM]], %[[RESULT1]]) -// CHECK-NEXT: return %[[SECOND_ELEM]] - - -// CHECK-LABEL: func @caller -func @caller(%arg0: tuple,i1, tensor<5xf32>>) -> tuple,i1, tensor<5xf32>>{ - %x0 = call @callee(%arg0) : (tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>) - %y0 = call @callee(%x0) : (tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>) - return %y0 : tuple,i1, tensor<5xf32>> -} -// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, %[[RESULT1:.*]]: memref<5xf32>) -// CHECK-SAME: i1 -// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} -// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() -// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc() -// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]]) -// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1 -// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], %[[CALLEE_RESULT]], %[[SECOND_ALLOC]]) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} -// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() -// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc() -// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]]) -// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1 -// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], %[[CALLEE_RESULT]], %[[SECOND_ALLOC]]) -// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} -// CHECK-NEXT: linalg.copy(%[[FIRST_ELEM]], %[[RESULT0]]) -// CHECK-NEXT: linalg.copy(%[[THIRD_ELEM]], %[[RESULT1]]) -// CHECK-NEXT: return %[[SECOND_ELEM]] - -// ----- - -// Test case: Testing BufferAssginmnetFuncOpConverter and -// BufferAssginmentReturnOpConverter to see if the return operation matches with -// the new function signature when there are tuple typed args and results. -// BufferAssginmentTypeConverter is set to flatten tuple typed arguments. The tuple -// typed values should be decomposed and composed using get_tuple_element and -// make_tuple operations of test dialect. Tensor types are converted to Memref. -// Memref typed function results are appended to the function arguments list. - -// CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results -func @decompose_tuple_typed_function_args_and_results(%arg0: tuple, %arg1: tensor<10xf32>, %arg2: tuple>) -> (tuple>, tensor<10xf32>, tuple){ - return %arg2, %arg1, %arg0 : tuple>, tensor<10xf32>, tuple -} -// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<5xf32>, %[[RESULT1:.*]]: memref<10xf32> -// CHECK-SAME: (i1, i1, f32) -// CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]]) -// CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]]) -// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32} -// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32} -// CHECK-NEXT: linalg.copy(%[[SECOND_TUPLE_SECOND_ELEM]], %[[RESULT0]]) -// CHECK-NEXT: linalg.copy(%[[ARG2]], %[[RESULT1]]) -// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]] diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index f03c953396a4a..bc26a8659831d 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -1669,7 +1669,7 @@ def TableGenBuildOp5 : TEST_Op<"tblgen_build_5", let results = (outs AnyType:$result); let extraClassDeclaration = [{ - static LogicalResult inferReturnTypes(MLIRContext *, + static LogicalResult inferReturnTypes(MLIRContext *, Optional location, ValueRange operands, DictionaryAttr attributes, RegionRange regions, SmallVectorImpl &inferredReturnTypes) { @@ -1679,31 +1679,4 @@ def TableGenBuildOp5 : TEST_Op<"tblgen_build_5", }]; } -//===----------------------------------------------------------------------===// -// Test BufferPlacement -//===----------------------------------------------------------------------===// - -def GetTupleElementOp: TEST_Op<"get_tuple_element"> { - let description = [{ - Test op that returns a specified element of the tuple. - }]; - - let arguments = (ins - TupleOf<[AnyType]>, - I32Attr:$index - ); - let results = (outs AnyType); -} - -def MakeTupleOp: TEST_Op<"make_tuple"> { - let description = [{ - Test op that creates a tuple value from a list of values. - }]; - - let arguments = (ins - Variadic:$inputs - ); - let results = (outs TupleOf<[AnyType]>); -} - #endif // TEST_OPS diff --git a/mlir/test/lib/Transforms/TestBufferPlacement.cpp b/mlir/test/lib/Transforms/TestBufferPlacement.cpp index 14b72b9fc92a0..6cc0924191cb8 100644 --- a/mlir/test/lib/Transforms/TestBufferPlacement.cpp +++ b/mlir/test/lib/Transforms/TestBufferPlacement.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "TestDialect.h" -#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/IR/Function.h" #include "mlir/IR/Operation.h" @@ -111,16 +109,14 @@ struct TestBufferPlacementPreparationPass void populateTensorLinalgToBufferLinalgConversionPattern( MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, - OwningRewritePatternList *patterns) { + TypeConverter *converter, OwningRewritePatternList *patterns) { populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, placer, - converter, patterns); + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp, + allowMemrefFunctionResults>(context, placer, converter, patterns); patterns->insert(context, placer, converter); } void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); registry.insert(); } @@ -131,8 +127,6 @@ struct TestBufferPlacementPreparationPass // Mark all Standard operations legal. target.addLegalDialect(); - target.addLegalOp(); - target.addLegalOp(); // Mark all Linalg operations illegal as long as they work on tensors. auto isLegalOperation = [&](Operation *op) { @@ -155,42 +149,6 @@ struct TestBufferPlacementPreparationPass converter.isLegal(&funcOp.getBody()); }); - auto kind = allowMemrefFunctionResults - ? BufferAssignmentTypeConverter::KeepAsFunctionResult - : BufferAssignmentTypeConverter::AppendToArgumentsList; - converter.setResultConversionKind(kind); - converter.setResultConversionKind( - kind); - - converter.addDecomposeTypeConversion( - [](TupleType tupleType, SmallVectorImpl &types) { - tupleType.getFlattenedTypes(types); - return success(); - }); - - converter.addArgumentMaterialization( - [](OpBuilder &builder, TupleType resultType, ValueRange inputs, - Location loc) -> Optional { - if (inputs.size() == 1) - return llvm::None; - TypeRange TypeRange = inputs.getTypes(); - SmallVector types(TypeRange.begin(), TypeRange.end()); - TupleType tuple = TupleType::get(types, builder.getContext()); - mlir::Value value = builder.create(loc, tuple, inputs); - return value; - }); - - converter.addDecomposeValueConversion([](OpBuilder &builder, Location loc, - TupleType resultType, Value value, - SmallVectorImpl &values) { - for (unsigned i = 0, e = resultType.size(); i < e; ++i) { - Value res = builder.create( - loc, resultType.getType(i), value, builder.getI32IntegerAttr(i)); - values.push_back(res); - } - return success(); - }); - // Walk over all the functions to apply buffer assignment. this->getOperation().walk([&](FuncOp function) -> WalkResult { OwningRewritePatternList patterns; From 255a60cdd6fdf564bcca645b67ea2d1fb127c9ce Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 1 Sep 2020 17:13:24 -0400 Subject: [PATCH 006/465] [libc++] Make some testing utilities constexpr This will be needed in order to test constexpr std::vector. --- libcxx/test/support/emplace_constructible.h | 20 +-- libcxx/test/support/min_allocator.h | 146 ++++++++++---------- 2 files changed, 83 insertions(+), 83 deletions(-) diff --git a/libcxx/test/support/emplace_constructible.h b/libcxx/test/support/emplace_constructible.h index f0d11ba76c87d..42a62fabe656d 100644 --- a/libcxx/test/support/emplace_constructible.h +++ b/libcxx/test/support/emplace_constructible.h @@ -7,7 +7,7 @@ template struct EmplaceConstructible { T value; - explicit EmplaceConstructible(T xvalue) : value(xvalue) {} + TEST_CONSTEXPR_CXX14 explicit EmplaceConstructible(T xvalue) : value(xvalue) {} EmplaceConstructible(EmplaceConstructible const&) = delete; }; @@ -15,9 +15,9 @@ template struct EmplaceConstructibleAndMoveInsertable { int copied = 0; T value; - explicit EmplaceConstructibleAndMoveInsertable(T xvalue) : value(xvalue) {} + TEST_CONSTEXPR_CXX14 explicit EmplaceConstructibleAndMoveInsertable(T xvalue) : value(xvalue) {} - EmplaceConstructibleAndMoveInsertable( + TEST_CONSTEXPR_CXX14 EmplaceConstructibleAndMoveInsertable( EmplaceConstructibleAndMoveInsertable&& Other) : copied(Other.copied + 1), value(std::move(Other.value)) {} }; @@ -27,13 +27,13 @@ struct EmplaceConstructibleAndMoveable { int copied = 0; int assigned = 0; T value; - explicit EmplaceConstructibleAndMoveable(T xvalue) noexcept : value(xvalue) {} + TEST_CONSTEXPR_CXX14 explicit EmplaceConstructibleAndMoveable(T xvalue) noexcept : value(xvalue) {} - EmplaceConstructibleAndMoveable(EmplaceConstructibleAndMoveable&& Other) + TEST_CONSTEXPR_CXX14 EmplaceConstructibleAndMoveable(EmplaceConstructibleAndMoveable&& Other) noexcept : copied(Other.copied + 1), value(std::move(Other.value)) {} - EmplaceConstructibleAndMoveable& + TEST_CONSTEXPR_CXX14 EmplaceConstructibleAndMoveable& operator=(EmplaceConstructibleAndMoveable&& Other) noexcept { copied = Other.copied; assigned = Other.assigned + 1; @@ -47,15 +47,15 @@ struct EmplaceConstructibleMoveableAndAssignable { int copied = 0; int assigned = 0; T value; - explicit EmplaceConstructibleMoveableAndAssignable(T xvalue) noexcept + TEST_CONSTEXPR_CXX14 explicit EmplaceConstructibleMoveableAndAssignable(T xvalue) noexcept : value(xvalue) {} - EmplaceConstructibleMoveableAndAssignable( + TEST_CONSTEXPR_CXX14 EmplaceConstructibleMoveableAndAssignable( EmplaceConstructibleMoveableAndAssignable&& Other) noexcept : copied(Other.copied + 1), value(std::move(Other.value)) {} - EmplaceConstructibleMoveableAndAssignable& + TEST_CONSTEXPR_CXX14 EmplaceConstructibleMoveableAndAssignable& operator=(EmplaceConstructibleMoveableAndAssignable&& Other) noexcept { copied = Other.copied; assigned = Other.assigned + 1; @@ -63,7 +63,7 @@ struct EmplaceConstructibleMoveableAndAssignable { return *this; } - EmplaceConstructibleMoveableAndAssignable& operator=(T xvalue) { + TEST_CONSTEXPR_CXX14 EmplaceConstructibleMoveableAndAssignable& operator=(T xvalue) { value = std::move(xvalue); ++assigned; return *this; diff --git a/libcxx/test/support/min_allocator.h b/libcxx/test/support/min_allocator.h index fd23fc4383f34..b9f6f6147609c 100644 --- a/libcxx/test/support/min_allocator.h +++ b/libcxx/test/support/min_allocator.h @@ -220,19 +220,19 @@ class min_pointer void* ptr_; public: min_pointer() TEST_NOEXCEPT = default; - min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} + TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} template ::value >::type > - min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(p.ptr_) {} + TEST_CONSTEXPR_CXX14 min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(p.ptr_) {} - explicit operator bool() const {return ptr_ != nullptr;} + TEST_CONSTEXPR_CXX14 explicit operator bool() const {return ptr_ != nullptr;} - friend bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} - friend bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} + TEST_CONSTEXPR_CXX14 friend bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} + TEST_CONSTEXPR_CXX14 friend bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} template friend class min_pointer; }; @@ -241,13 +241,13 @@ class min_pointer { T* ptr_; - explicit min_pointer(T* p) TEST_NOEXCEPT : ptr_(p) {} + TEST_CONSTEXPR_CXX14 explicit min_pointer(T* p) TEST_NOEXCEPT : ptr_(p) {} public: min_pointer() TEST_NOEXCEPT = default; - min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} - explicit min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(static_cast(p.ptr_)) {} + TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} + TEST_CONSTEXPR_CXX14 explicit min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(static_cast(p.ptr_)) {} - explicit operator bool() const {return ptr_ != nullptr;} + TEST_CONSTEXPR_CXX14 explicit operator bool() const {return ptr_ != nullptr;} typedef std::ptrdiff_t difference_type; typedef T& reference; @@ -255,53 +255,53 @@ class min_pointer typedef T value_type; typedef std::random_access_iterator_tag iterator_category; - reference operator*() const {return *ptr_;} - pointer operator->() const {return ptr_;} + TEST_CONSTEXPR_CXX14 reference operator*() const {return *ptr_;} + TEST_CONSTEXPR_CXX14 pointer operator->() const {return ptr_;} - min_pointer& operator++() {++ptr_; return *this;} - min_pointer operator++(int) {min_pointer tmp(*this); ++ptr_; return tmp;} + TEST_CONSTEXPR_CXX14 min_pointer& operator++() {++ptr_; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer operator++(int) {min_pointer tmp(*this); ++ptr_; return tmp;} - min_pointer& operator--() {--ptr_; return *this;} - min_pointer operator--(int) {min_pointer tmp(*this); --ptr_; return tmp;} + TEST_CONSTEXPR_CXX14 min_pointer& operator--() {--ptr_; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer operator--(int) {min_pointer tmp(*this); --ptr_; return tmp;} - min_pointer& operator+=(difference_type n) {ptr_ += n; return *this;} - min_pointer& operator-=(difference_type n) {ptr_ -= n; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer& operator+=(difference_type n) {ptr_ += n; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer& operator-=(difference_type n) {ptr_ -= n; return *this;} - min_pointer operator+(difference_type n) const + TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n) const { min_pointer tmp(*this); tmp += n; return tmp; } - friend min_pointer operator+(difference_type n, min_pointer x) + friend TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n, min_pointer x) { return x + n; } - min_pointer operator-(difference_type n) const + TEST_CONSTEXPR_CXX14 min_pointer operator-(difference_type n) const { min_pointer tmp(*this); tmp -= n; return tmp; } - friend difference_type operator-(min_pointer x, min_pointer y) + friend TEST_CONSTEXPR_CXX14 difference_type operator-(min_pointer x, min_pointer y) { return x.ptr_ - y.ptr_; } - reference operator[](difference_type n) const {return ptr_[n];} + TEST_CONSTEXPR_CXX14 reference operator[](difference_type n) const {return ptr_[n];} - friend bool operator< (min_pointer x, min_pointer y) {return x.ptr_ < y.ptr_;} - friend bool operator> (min_pointer x, min_pointer y) {return y < x;} - friend bool operator<=(min_pointer x, min_pointer y) {return !(y < x);} - friend bool operator>=(min_pointer x, min_pointer y) {return !(x < y);} + friend TEST_CONSTEXPR_CXX14 bool operator< (min_pointer x, min_pointer y) {return x.ptr_ < y.ptr_;} + friend TEST_CONSTEXPR_CXX14 bool operator> (min_pointer x, min_pointer y) {return y < x;} + friend TEST_CONSTEXPR_CXX14 bool operator<=(min_pointer x, min_pointer y) {return !(y < x);} + friend TEST_CONSTEXPR_CXX14 bool operator>=(min_pointer x, min_pointer y) {return !(x < y);} - static min_pointer pointer_to(T& t) {return min_pointer(std::addressof(t));} + static TEST_CONSTEXPR_CXX14 min_pointer pointer_to(T& t) {return min_pointer(std::addressof(t));} - friend bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} - friend bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} + friend TEST_CONSTEXPR_CXX14 bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} + friend TEST_CONSTEXPR_CXX14 bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} template friend class min_pointer; template friend class min_allocator; }; @@ -311,14 +311,14 @@ class min_pointer { const T* ptr_; - explicit min_pointer(const T* p) : ptr_(p) {} + TEST_CONSTEXPR_CXX14 explicit min_pointer(const T* p) : ptr_(p) {} public: min_pointer() TEST_NOEXCEPT = default; - min_pointer(std::nullptr_t) : ptr_(nullptr) {} - min_pointer(min_pointer p) : ptr_(p.ptr_) {} - explicit min_pointer(min_pointer p) : ptr_(static_cast(p.ptr_)) {} + TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) : ptr_(nullptr) {} + TEST_CONSTEXPR_CXX14 min_pointer(min_pointer p) : ptr_(p.ptr_) {} + TEST_CONSTEXPR_CXX14 explicit min_pointer(min_pointer p) : ptr_(static_cast(p.ptr_)) {} - explicit operator bool() const {return ptr_ != nullptr;} + TEST_CONSTEXPR_CXX14 explicit operator bool() const {return ptr_ != nullptr;} typedef std::ptrdiff_t difference_type; typedef const T& reference; @@ -326,58 +326,58 @@ class min_pointer typedef const T value_type; typedef std::random_access_iterator_tag iterator_category; - reference operator*() const {return *ptr_;} - pointer operator->() const {return ptr_;} + TEST_CONSTEXPR_CXX14 reference operator*() const {return *ptr_;} + TEST_CONSTEXPR_CXX14 pointer operator->() const {return ptr_;} - min_pointer& operator++() {++ptr_; return *this;} - min_pointer operator++(int) {min_pointer tmp(*this); ++ptr_; return tmp;} + TEST_CONSTEXPR_CXX14 min_pointer& operator++() {++ptr_; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer operator++(int) {min_pointer tmp(*this); ++ptr_; return tmp;} - min_pointer& operator--() {--ptr_; return *this;} - min_pointer operator--(int) {min_pointer tmp(*this); --ptr_; return tmp;} + TEST_CONSTEXPR_CXX14 min_pointer& operator--() {--ptr_; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer operator--(int) {min_pointer tmp(*this); --ptr_; return tmp;} - min_pointer& operator+=(difference_type n) {ptr_ += n; return *this;} - min_pointer& operator-=(difference_type n) {ptr_ -= n; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer& operator+=(difference_type n) {ptr_ += n; return *this;} + TEST_CONSTEXPR_CXX14 min_pointer& operator-=(difference_type n) {ptr_ -= n; return *this;} - min_pointer operator+(difference_type n) const + TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n) const { min_pointer tmp(*this); tmp += n; return tmp; } - friend min_pointer operator+(difference_type n, min_pointer x) + friend TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n, min_pointer x) { return x + n; } - min_pointer operator-(difference_type n) const + TEST_CONSTEXPR_CXX14 min_pointer operator-(difference_type n) const { min_pointer tmp(*this); tmp -= n; return tmp; } - friend difference_type operator-(min_pointer x, min_pointer y) + friend TEST_CONSTEXPR_CXX14 difference_type operator-(min_pointer x, min_pointer y) { return x.ptr_ - y.ptr_; } - reference operator[](difference_type n) const {return ptr_[n];} + TEST_CONSTEXPR_CXX14 reference operator[](difference_type n) const {return ptr_[n];} - friend bool operator< (min_pointer x, min_pointer y) {return x.ptr_ < y.ptr_;} - friend bool operator> (min_pointer x, min_pointer y) {return y < x;} - friend bool operator<=(min_pointer x, min_pointer y) {return !(y < x);} - friend bool operator>=(min_pointer x, min_pointer y) {return !(x < y);} + friend TEST_CONSTEXPR_CXX14 bool operator< (min_pointer x, min_pointer y) {return x.ptr_ < y.ptr_;} + friend TEST_CONSTEXPR_CXX14 bool operator> (min_pointer x, min_pointer y) {return y < x;} + friend TEST_CONSTEXPR_CXX14 bool operator<=(min_pointer x, min_pointer y) {return !(y < x);} + friend TEST_CONSTEXPR_CXX14 bool operator>=(min_pointer x, min_pointer y) {return !(x < y);} - static min_pointer pointer_to(const T& t) {return min_pointer(std::addressof(t));} + static TEST_CONSTEXPR_CXX14 min_pointer pointer_to(const T& t) {return min_pointer(std::addressof(t));} - friend bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} - friend bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} + friend TEST_CONSTEXPR_CXX14 bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} + friend TEST_CONSTEXPR_CXX14 bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} template friend class min_pointer; }; template -inline +TEST_CONSTEXPR_CXX14 inline bool operator==(min_pointer x, std::nullptr_t) { @@ -385,7 +385,7 @@ operator==(min_pointer x, std::nullptr_t) } template -inline +TEST_CONSTEXPR_CXX14 inline bool operator==(std::nullptr_t, min_pointer x) { @@ -393,7 +393,7 @@ operator==(std::nullptr_t, min_pointer x) } template -inline +TEST_CONSTEXPR_CXX14 inline bool operator!=(min_pointer x, std::nullptr_t) { @@ -401,7 +401,7 @@ operator!=(min_pointer x, std::nullptr_t) } template -inline +TEST_CONSTEXPR_CXX14 inline bool operator!=(std::nullptr_t, min_pointer x) { @@ -417,20 +417,20 @@ class min_allocator min_allocator() = default; template - min_allocator(min_allocator) {} + TEST_CONSTEXPR_CXX20 min_allocator(min_allocator) {} - pointer allocate(std::ptrdiff_t n) + TEST_CONSTEXPR_CXX20 pointer allocate(std::ptrdiff_t n) { - return pointer(static_cast(::operator new(n*sizeof(T)))); + return pointer(std::allocator().allocate(n)); } - void deallocate(pointer p, std::ptrdiff_t) + TEST_CONSTEXPR_CXX20 void deallocate(pointer p, std::ptrdiff_t n) { - return ::operator delete(p.ptr_); + std::allocator().deallocate(p.ptr_, n); } - friend bool operator==(min_allocator, min_allocator) {return true;} - friend bool operator!=(min_allocator x, min_allocator y) {return !(x == y);} + TEST_CONSTEXPR_CXX20 friend bool operator==(min_allocator, min_allocator) {return true;} + TEST_CONSTEXPR_CXX20 friend bool operator!=(min_allocator x, min_allocator y) {return !(x == y);} }; template @@ -439,23 +439,23 @@ class explicit_allocator public: typedef T value_type; - explicit_allocator() TEST_NOEXCEPT {} + TEST_CONSTEXPR_CXX20 explicit_allocator() TEST_NOEXCEPT {} template - explicit explicit_allocator(explicit_allocator) TEST_NOEXCEPT {} + TEST_CONSTEXPR_CXX20 explicit explicit_allocator(explicit_allocator) TEST_NOEXCEPT {} - T* allocate(std::size_t n) + TEST_CONSTEXPR_CXX20 T* allocate(std::size_t n) { - return static_cast(::operator new(n*sizeof(T))); + return static_cast(std::allocator().allocate(n)); } - void deallocate(T* p, std::size_t) + TEST_CONSTEXPR_CXX20 void deallocate(T* p, std::size_t n) { - return ::operator delete(static_cast(p)); + std::allocator().deallocate(p, n); } - friend bool operator==(explicit_allocator, explicit_allocator) {return true;} - friend bool operator!=(explicit_allocator x, explicit_allocator y) {return !(x == y);} + TEST_CONSTEXPR_CXX20 friend bool operator==(explicit_allocator, explicit_allocator) {return true;} + TEST_CONSTEXPR_CXX20 friend bool operator!=(explicit_allocator x, explicit_allocator y) {return !(x == y);} }; #endif // MIN_ALLOCATOR_H From 6f7737c46811993c0ed9b9143cbe4cb49dcf1d03 Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Wed, 2 Sep 2020 10:06:27 -0400 Subject: [PATCH 007/465] [ImplicitNullChecks] NFC: Separated out checks and added comments Separated out some checks in isSuitableMemoryOp and added comments explaining why some of those checks are done. Tests-Run:X86 implicit null checks tests. --- llvm/lib/CodeGen/ImplicitNullChecks.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 16c9bfc672af5..c6b1eeb3408c1 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -368,18 +368,26 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, const MachineOperand *BaseOp; - if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI) || - !BaseOp->isReg() || BaseOp->getReg() != PointerReg) + // FIXME: This handles only simple addressing mode. + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) + return SR_Unsuitable; + + // We need the base of the memory instruction to be same as the register + // where the null check is performed (i.e. PointerReg). + if (!BaseOp->isReg() || BaseOp->getReg() != PointerReg) return SR_Unsuitable; - // FIXME: This algorithm assumes instructions have fixed-size offsets. + // Scalable offsets are a part of scalable vectors (SVE for AArch64). That + // target is in-practice unsupported for ImplicitNullChecks. if (OffsetIsScalable) return SR_Unsuitable; + if (!MI.mayLoadOrStore() || MI.isPredicable()) + return SR_Unsuitable; + // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. - if (!(MI.mayLoadOrStore() && !MI.isPredicable() && - -PageSize < Offset && Offset < PageSize)) + if (!(-PageSize < Offset && Offset < PageSize)) return SR_Unsuitable; // Finally, check whether the current memory access aliases with previous one. From 425573a2fa2dc5666273944a584acdb286447b66 Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Wed, 2 Sep 2020 10:19:10 -0400 Subject: [PATCH 008/465] [ImplicitNullChecks] NFC: Refactor dependence safety check After computing dependence, we check if it is safe to hoist by identifying if it clobbers any liveIns in the sibling block (NullSucc). This check is moved to its own function which will be used in the soon-to-be modified dependence checking algorithm for implicit null checks pass. Tests-Run: lit tests on X86/implicit-* --- llvm/lib/CodeGen/ImplicitNullChecks.cpp | 71 +++++++++++++++---------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp index c6b1eeb3408c1..dc1b0a867b0d6 100644 --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -200,6 +200,13 @@ class ImplicitNullChecks : public MachineFunctionPass { unsigned PointerReg, ArrayRef PrevInsts); + /// Returns true if \p DependenceMI can clobber the liveIns in NullSucc block + /// if it was hoisted to the NullCheck block. This is used by caller + /// canHoistInst to decide if DependenceMI can be hoisted safely. + bool canDependenceHoistingClobberLiveIns(MachineInstr *DependenceMI, + MachineBasicBlock *NullSucc, + unsigned PointerReg); + /// Return true if \p FaultingMI can be hoisted from after the /// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a /// non-null value if we also need to (and legally can) hoist a depedency. @@ -401,32 +408,9 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI, return SR_Suitable; } -bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, - unsigned PointerReg, - ArrayRef InstsSeenSoFar, - MachineBasicBlock *NullSucc, - MachineInstr *&Dependence) { - auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar); - if (!DepResult.CanReorder) - return false; - - if (!DepResult.PotentialDependence) { - Dependence = nullptr; - return true; - } - - auto DependenceItr = *DepResult.PotentialDependence; - auto *DependenceMI = *DependenceItr; - - // We don't want to reason about speculating loads. Note -- at this point - // we should have already filtered out all of the other non-speculatable - // things, like calls and stores. - // We also do not want to hoist stores because it might change the memory - // while the FaultingMI may result in faulting. - assert(canHandle(DependenceMI) && "Should never have reached here!"); - if (DependenceMI->mayLoadOrStore()) - return false; - +bool ImplicitNullChecks::canDependenceHoistingClobberLiveIns( + MachineInstr *DependenceMI, MachineBasicBlock *NullSucc, + unsigned PointerReg) { for (auto &DependenceMO : DependenceMI->operands()) { if (!(DependenceMO.isReg() && DependenceMO.getReg())) continue; @@ -449,7 +433,7 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, // same as it would have been had the load not have executed and we'd have // branched to NullSucc directly. if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg())) - return false; + return true; // The Dependency can't be re-defining the base register -- then we won't // get the memory operation on the address we want. This is already @@ -459,6 +443,39 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, "Should have been checked before!"); } + // The dependence does not clobber live-ins in NullSucc block. + return false; +} + +bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, + unsigned PointerReg, + ArrayRef InstsSeenSoFar, + MachineBasicBlock *NullSucc, + MachineInstr *&Dependence) { + auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar); + if (!DepResult.CanReorder) + return false; + + if (!DepResult.PotentialDependence) { + Dependence = nullptr; + return true; + } + + auto DependenceItr = *DepResult.PotentialDependence; + auto *DependenceMI = *DependenceItr; + + // We don't want to reason about speculating loads. Note -- at this point + // we should have already filtered out all of the other non-speculatable + // things, like calls and stores. + // We also do not want to hoist stores because it might change the memory + // while the FaultingMI may result in faulting. + assert(canHandle(DependenceMI) && "Should never have reached here!"); + if (DependenceMI->mayLoadOrStore()) + return false; + + if (canDependenceHoistingClobberLiveIns(DependenceMI, NullSucc, PointerReg)) + return false; + auto DepDepResult = computeDependence(DependenceMI, {InstsSeenSoFar.begin(), DependenceItr}); From addb5148f58d710fcaba04bb2afec8006ae8ac15 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Fri, 28 Aug 2020 15:38:39 +0200 Subject: [PATCH 009/465] [lldb/Target] Add custom interpreter option to `platform shell` This patch adds the ability to use a custom interpreter with the `platform shell` command. If the user set the `-s|--shell` option with the path to a binary, lldb passes it down to the platform's `RunShellProcess` method and set it as the shell to use in `ProcessLaunchInfo to run commands. Note that not all the Platforms support running shell commands with custom interpreters (i.e. RemoteGDBServer is only expected to use the default shell). This patch also makes some refactoring and cleanups, like swapping CString for StringRef when possible and updating `SBPlatformShellCommand` with new methods and a new constructor. rdar://67759256 Differential Revision: https://reviews.llvm.org/D86667 Signed-off-by: Med Ismail Bennani --- lldb/bindings/interface/SBPlatform.i | 7 +++ lldb/include/lldb/API/SBPlatform.h | 5 ++ lldb/include/lldb/Host/Host.h | 41 ++++++++++++++-- lldb/include/lldb/Target/Platform.h | 13 ++++- .../include/lldb/Target/RemoteAwarePlatform.h | 7 ++- lldb/source/API/SBPlatform.cpp | 47 +++++++++++++++++-- .../source/Commands/CommandObjectPlatform.cpp | 18 ++++++- lldb/source/Commands/Options.td | 2 + lldb/source/Host/common/Host.cpp | 45 +++++++++++++----- lldb/source/Host/macosx/objcxx/Host.mm | 8 ++-- .../gdb-server/PlatformRemoteGDBServer.cpp | 2 +- .../gdb-server/PlatformRemoteGDBServer.h | 2 +- .../GDBRemoteCommunicationClient.cpp | 4 +- .../gdb-remote/GDBRemoteCommunicationClient.h | 2 +- lldb/source/Target/Platform.cpp | 22 +++++++-- lldb/source/Target/RemoteAwarePlatform.cpp | 19 ++++++-- .../test/API/commands/platform/basic/Makefile | 5 ++ .../platform/basic/TestPlatformCommand.py | 9 ++++ .../platform/basic/TestPlatformPython.py | 17 +++++++ .../API/commands/platform/basic/myshell.c | 24 ++++++++++ 20 files changed, 258 insertions(+), 41 deletions(-) create mode 100644 lldb/test/API/commands/platform/basic/Makefile create mode 100644 lldb/test/API/commands/platform/basic/myshell.c diff --git a/lldb/bindings/interface/SBPlatform.i b/lldb/bindings/interface/SBPlatform.i index 81945222c059a..07aecfc354bb2 100644 --- a/lldb/bindings/interface/SBPlatform.i +++ b/lldb/bindings/interface/SBPlatform.i @@ -45,6 +45,7 @@ public: class SBPlatformShellCommand { public: + SBPlatformShellCommand (const char *shell, const char *shell_command); SBPlatformShellCommand (const char *shell_command); SBPlatformShellCommand (const SBPlatformShellCommand &rhs); @@ -54,6 +55,12 @@ public: void Clear(); + const char * + GetShell(); + + void + SetShell(const char *shell_interpreter); + const char * GetCommand(); diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h index 4d251b1299546..98291f18247dc 100644 --- a/lldb/include/lldb/API/SBPlatform.h +++ b/lldb/include/lldb/API/SBPlatform.h @@ -51,6 +51,7 @@ class LLDB_API SBPlatformConnectOptions { class LLDB_API SBPlatformShellCommand { public: + SBPlatformShellCommand(const char *shell, const char *shell_command); SBPlatformShellCommand(const char *shell_command); SBPlatformShellCommand(const SBPlatformShellCommand &rhs); @@ -61,6 +62,10 @@ class LLDB_API SBPlatformShellCommand { void Clear(); + const char *GetShell(); + + void SetShell(const char *shell); + const char *GetCommand(); void SetCommand(const char *shell_command); diff --git a/lldb/include/lldb/Host/Host.h b/lldb/include/lldb/Host/Host.h index f19cb85d2329c..76792cc6eab56 100644 --- a/lldb/include/lldb/Host/Host.h +++ b/lldb/include/lldb/Host/Host.h @@ -196,19 +196,34 @@ class Host { static Status ShellExpandArguments(ProcessLaunchInfo &launch_info); /// Run a shell command. - /// \arg command shouldn't be NULL + /// \arg command shouldn't be empty /// \arg working_dir Pass empty FileSpec to use the current working directory /// \arg status_ptr Pass NULL if you don't want the process exit status /// \arg signo_ptr Pass NULL if you don't want the signal that caused the /// process to exit /// \arg command_output Pass NULL if you don't want the command output /// \arg hide_stderr if this is false, redirect stderr to stdout - /// TODO: Convert this function to take a StringRef. - static Status RunShellCommand(const char *command, + static Status RunShellCommand(llvm::StringRef command, const FileSpec &working_dir, int *status_ptr, int *signo_ptr, std::string *command_output, const Timeout &timeout, - bool run_in_default_shell = true, + bool run_in_shell = true, + bool hide_stderr = false); + + /// Run a shell command. + /// \arg shell Pass an empty string if you want to use the default shell + /// interpreter \arg command \arg working_dir Pass empty FileSpec to use the + /// current working directory \arg status_ptr Pass NULL if you don't want + /// the process exit status \arg signo_ptr Pass NULL if you don't want the + /// signal that caused + /// the process to exit + /// \arg command_output Pass NULL if you don't want the command output + /// \arg hide_stderr If this is \b false, redirect stderr to stdout + static Status RunShellCommand(llvm::StringRef shell, llvm::StringRef command, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output, + const Timeout &timeout, + bool run_in_shell = true, bool hide_stderr = false); /// Run a shell command. @@ -222,7 +237,23 @@ class Host { int *status_ptr, int *signo_ptr, std::string *command_output, const Timeout &timeout, - bool run_in_default_shell = true, + bool run_in_shell = true, + bool hide_stderr = false); + + /// Run a shell command. + /// \arg shell Pass an empty string if you want to use the default + /// shell interpreter \arg command \arg working_dir Pass empty FileSpec to use + /// the current working directory \arg status_ptr Pass NULL if you don't + /// want the process exit status \arg signo_ptr Pass NULL if you don't + /// want the signal that caused the + /// process to exit + /// \arg command_output Pass NULL if you don't want the command output + /// \arg hide_stderr If this is \b false, redirect stderr to stdout + static Status RunShellCommand(llvm::StringRef shell, const Args &args, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output, + const Timeout &timeout, + bool run_in_shell = true, bool hide_stderr = false); static bool OpenFileInExternalEditor(const FileSpec &file_spec, diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h index 9335f73b37df1..64b49ecca6061 100644 --- a/lldb/include/lldb/Target/Platform.h +++ b/lldb/include/lldb/Target/Platform.h @@ -621,7 +621,18 @@ class Platform : public PluginInterface { } virtual lldb_private::Status RunShellCommand( - const char *command, // Shouldn't be nullptr + llvm::StringRef command, + const FileSpec &working_dir, // Pass empty FileSpec to use the current + // working directory + int *status_ptr, // Pass nullptr if you don't want the process exit status + int *signo_ptr, // Pass nullptr if you don't want the signal that caused + // the process to exit + std::string + *command_output, // Pass nullptr if you don't want the command output + const Timeout &timeout); + + virtual lldb_private::Status RunShellCommand( + llvm::StringRef shell, llvm::StringRef command, const FileSpec &working_dir, // Pass empty FileSpec to use the current // working directory int *status_ptr, // Pass nullptr if you don't want the process exit status diff --git a/lldb/include/lldb/Target/RemoteAwarePlatform.h b/lldb/include/lldb/Target/RemoteAwarePlatform.h index 5741dbe027b70..6d6ac99c093fa 100644 --- a/lldb/include/lldb/Target/RemoteAwarePlatform.h +++ b/lldb/include/lldb/Target/RemoteAwarePlatform.h @@ -68,11 +68,16 @@ class RemoteAwarePlatform : public Platform { bool GetRemoteOSKernelDescription(std::string &s) override; ArchSpec GetRemoteSystemArchitecture() override; - Status RunShellCommand(const char *command, const FileSpec &working_dir, + Status RunShellCommand(llvm::StringRef command, const FileSpec &working_dir, int *status_ptr, int *signo_ptr, std::string *command_output, const Timeout &timeout) override; + Status RunShellCommand(llvm::StringRef interpreter, llvm::StringRef command, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output, + const Timeout &timeout) override; + const char *GetHostname() override; UserIDResolver &GetUserIDResolver() override; lldb_private::Environment GetEnvironment() override; diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index 7ac852488ffbb..3c6422e211fca 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -50,14 +50,25 @@ struct PlatformConnectOptions { // PlatformShellCommand struct PlatformShellCommand { - PlatformShellCommand(const char *shell_command = nullptr) + PlatformShellCommand(llvm::StringRef shell_interpreter, + llvm::StringRef shell_command) : m_command(), m_working_dir(), m_status(0), m_signo(0) { - if (shell_command && shell_command[0]) - m_command = shell_command; + if (!shell_interpreter.empty()) + m_shell = shell_interpreter.str(); + + if (!m_shell.empty() && !shell_command.empty()) + m_command = shell_command.str(); + } + + PlatformShellCommand(llvm::StringRef shell_command = llvm::StringRef()) + : m_shell(), m_command(), m_working_dir(), m_status(0), m_signo(0) { + if (!shell_command.empty()) + m_command = shell_command.str(); } ~PlatformShellCommand() = default; + std::string m_shell; std::string m_command; std::string m_working_dir; std::string m_output; @@ -163,6 +174,13 @@ void SBPlatformConnectOptions::SetLocalCacheDirectory(const char *path) { } // SBPlatformShellCommand +SBPlatformShellCommand::SBPlatformShellCommand(const char *shell_interpreter, + const char *shell_command) + : m_opaque_ptr(new PlatformShellCommand(shell_interpreter, shell_command)) { + LLDB_RECORD_CONSTRUCTOR(SBPlatformShellCommand, (const char *, const char *), + shell_interpreter, shell_command); +} + SBPlatformShellCommand::SBPlatformShellCommand(const char *shell_command) : m_opaque_ptr(new PlatformShellCommand(shell_command)) { LLDB_RECORD_CONSTRUCTOR(SBPlatformShellCommand, (const char *), @@ -200,6 +218,24 @@ void SBPlatformShellCommand::Clear() { m_opaque_ptr->m_signo = 0; } +const char *SBPlatformShellCommand::GetShell() { + LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetShell); + + if (m_opaque_ptr->m_shell.empty()) + return nullptr; + return m_opaque_ptr->m_shell.c_str(); +} + +void SBPlatformShellCommand::SetShell(const char *shell_interpreter) { + LLDB_RECORD_METHOD(void, SBPlatformShellCommand, SetShell, (const char *), + shell_interpreter); + + if (shell_interpreter && shell_interpreter[0]) + m_opaque_ptr->m_shell = shell_interpreter; + else + m_opaque_ptr->m_shell.clear(); +} + const char *SBPlatformShellCommand::GetCommand() { LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetCommand); @@ -557,7 +593,8 @@ SBError SBPlatform::Run(SBPlatformShellCommand &shell_command) { if (working_dir) shell_command.SetWorkingDirectory(working_dir); } - return platform_sp->RunShellCommand(command, FileSpec(working_dir), + return platform_sp->RunShellCommand(shell_command.m_opaque_ptr->m_shell, + command, FileSpec(working_dir), &shell_command.m_opaque_ptr->m_status, &shell_command.m_opaque_ptr->m_signo, &shell_command.m_opaque_ptr->m_output, @@ -699,6 +736,8 @@ void RegisterMethods(Registry &R) { SBPlatformShellCommand &, SBPlatformShellCommand, operator=,(const lldb::SBPlatformShellCommand &)); LLDB_REGISTER_METHOD(void, SBPlatformShellCommand, Clear, ()); + LLDB_REGISTER_METHOD(const char *, SBPlatformShellCommand, GetShell, ()); + LLDB_REGISTER_METHOD(void, SBPlatformShellCommand, SetShell, (const char *)); LLDB_REGISTER_METHOD(const char *, SBPlatformShellCommand, GetCommand, ()); LLDB_REGISTER_METHOD(void, SBPlatformShellCommand, SetCommand, (const char *)); diff --git a/lldb/source/Commands/CommandObjectPlatform.cpp b/lldb/source/Commands/CommandObjectPlatform.cpp index b5409e611f058..3a5af9f91cf16 100644 --- a/lldb/source/Commands/CommandObjectPlatform.cpp +++ b/lldb/source/Commands/CommandObjectPlatform.cpp @@ -1611,6 +1611,16 @@ class CommandObjectPlatformShell : public CommandObjectRaw { else m_timeout = std::chrono::seconds(timeout_sec); break; + case 's': { + if (option_arg.empty()) { + error.SetErrorStringWithFormat( + "missing shell interpreter path for option -i|--interpreter."); + return error; + } + + m_shell_interpreter = option_arg.str(); + break; + } default: llvm_unreachable("Unimplemented option"); } @@ -1621,10 +1631,12 @@ class CommandObjectPlatformShell : public CommandObjectRaw { void OptionParsingStarting(ExecutionContext *execution_context) override { m_timeout.reset(); m_use_host_platform = false; + m_shell_interpreter.clear(); } Timeout m_timeout = std::chrono::seconds(10); bool m_use_host_platform; + std::string m_shell_interpreter; }; CommandObjectPlatformShell(CommandInterpreter &interpreter) @@ -1650,7 +1662,6 @@ class CommandObjectPlatformShell : public CommandObjectRaw { const bool is_alias = !raw_command_line.contains("platform"); OptionsWithRaw args(raw_command_line); - const char *expr = args.GetRawPart().c_str(); if (args.HasArgs()) if (!ParseOptions(args.GetArgs(), result)) @@ -1662,6 +1673,8 @@ class CommandObjectPlatformShell : public CommandObjectRaw { return false; } + llvm::StringRef cmd = args.GetRawPart(); + PlatformSP platform_sp( m_options.m_use_host_platform ? Platform::GetHostPlatform() @@ -1672,7 +1685,8 @@ class CommandObjectPlatformShell : public CommandObjectRaw { std::string output; int status = -1; int signo = -1; - error = (platform_sp->RunShellCommand(expr, working_dir, &status, &signo, + error = (platform_sp->RunShellCommand(m_options.m_shell_interpreter, cmd, + working_dir, &status, &signo, &output, m_options.m_timeout)); if (!output.empty()) result.GetOutputStream().PutCString(output); diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index fbb64957f48d3..4bfaf18ec3025 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -631,6 +631,8 @@ let Command = "platform shell" in { Desc<"Run the commands on the host shell when enabled.">; def platform_shell_timeout : Option<"timeout", "t">, Arg<"Value">, Desc<"Seconds to wait for the remote host to finish running the command.">; + def platform_shell_interpreter : Option<"shell", "s">, Arg<"Path">, + Desc<"Shell interpreter path. This is the binary used to run the command.">; } let Command = "process attach" in { diff --git a/lldb/source/Host/common/Host.cpp b/lldb/source/Host/common/Host.cpp index 71c2983ab00f3..958fca07850bf 100644 --- a/lldb/source/Host/common/Host.cpp +++ b/lldb/source/Host/common/Host.cpp @@ -467,14 +467,24 @@ MonitorShellCommand(std::shared_ptr shell_info, lldb::pid_t pid, return true; } -Status Host::RunShellCommand(const char *command, const FileSpec &working_dir, - int *status_ptr, int *signo_ptr, - std::string *command_output_ptr, +Status Host::RunShellCommand(llvm::StringRef command, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output_ptr, + const Timeout &timeout, + bool run_in_shell, bool hide_stderr) { + return RunShellCommand(llvm::StringRef(), Args(command), working_dir, + status_ptr, signo_ptr, command_output_ptr, timeout, + run_in_shell, hide_stderr); +} + +Status Host::RunShellCommand(llvm::StringRef shell_path, + llvm::StringRef command, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output_ptr, const Timeout &timeout, - bool run_in_default_shell, - bool hide_stderr) { - return RunShellCommand(Args(command), working_dir, status_ptr, signo_ptr, - command_output_ptr, timeout, run_in_default_shell, + bool run_in_shell, bool hide_stderr) { + return RunShellCommand(shell_path, Args(command), working_dir, status_ptr, + signo_ptr, command_output_ptr, timeout, run_in_shell, hide_stderr); } @@ -482,14 +492,27 @@ Status Host::RunShellCommand(const Args &args, const FileSpec &working_dir, int *status_ptr, int *signo_ptr, std::string *command_output_ptr, const Timeout &timeout, - bool run_in_default_shell, - bool hide_stderr) { + bool run_in_shell, bool hide_stderr) { + return RunShellCommand(llvm::StringRef(), args, working_dir, status_ptr, + signo_ptr, command_output_ptr, timeout, run_in_shell, + hide_stderr); +} + +Status Host::RunShellCommand(llvm::StringRef shell_path, const Args &args, + const FileSpec &working_dir, int *status_ptr, + int *signo_ptr, std::string *command_output_ptr, + const Timeout &timeout, + bool run_in_shell, bool hide_stderr) { Status error; ProcessLaunchInfo launch_info; launch_info.SetArchitecture(HostInfo::GetArchitecture()); - if (run_in_default_shell) { + if (run_in_shell) { // Run the command in a shell - launch_info.SetShell(HostInfo::GetDefaultShell()); + FileSpec shell = HostInfo::GetDefaultShell(); + if (!shell_path.empty()) + shell.SetPath(shell_path); + + launch_info.SetShell(shell); launch_info.GetArguments().AppendArguments(args); const bool localhost = true; const bool will_debug = false; diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm index faac6f59190af..8cd3b35919936 100644 --- a/lldb/source/Host/macosx/objcxx/Host.mm +++ b/lldb/source/Host/macosx/objcxx/Host.mm @@ -1323,11 +1323,11 @@ static bool ShouldLaunchUsingXPC(ProcessLaunchInfo &launch_info) { launch_info.SetWorkingDirectory(working_dir); } } - bool run_in_default_shell = true; + bool run_in_shell = true; bool hide_stderr = true; - Status e = RunShellCommand(expand_command, cwd, &status, nullptr, &output, - std::chrono::seconds(10), run_in_default_shell, - hide_stderr); + Status e = + RunShellCommand(expand_command, cwd, &status, nullptr, &output, + std::chrono::seconds(10), run_in_shell, hide_stderr); if (e.Fail()) return e; diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp index e1eb15c3e8c92..0e0b61f1534f7 100644 --- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp +++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp @@ -711,7 +711,7 @@ bool PlatformRemoteGDBServer::GetFileExists(const FileSpec &file_spec) { } Status PlatformRemoteGDBServer::RunShellCommand( - const char *command, // Shouldn't be NULL + llvm::StringRef shell, llvm::StringRef command, const FileSpec & working_dir, // Pass empty FileSpec to use the current working directory int *status_ptr, // Pass NULL if you don't want the process exit status diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h index 3562b2bb09dfc..297b482eb87ad 100644 --- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h +++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h @@ -140,7 +140,7 @@ class PlatformRemoteGDBServer : public Platform, private UserIDResolver { Status Unlink(const FileSpec &path) override; Status RunShellCommand( - const char *command, // Shouldn't be NULL + llvm::StringRef shell, llvm::StringRef command, const FileSpec &working_dir, // Pass empty FileSpec to use the current // working directory int *status_ptr, // Pass NULL if you don't want the process exit status diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 0949b99185234..dd0f69841aa70 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -2812,7 +2812,7 @@ lldb::addr_t GDBRemoteCommunicationClient::GetShlibInfoAddr() { } lldb_private::Status GDBRemoteCommunicationClient::RunShellCommand( - const char *command, // Shouldn't be NULL + llvm::StringRef command, const FileSpec & working_dir, // Pass empty FileSpec to use the current working directory int *status_ptr, // Pass NULL if you don't want the process exit status @@ -2823,7 +2823,7 @@ lldb_private::Status GDBRemoteCommunicationClient::RunShellCommand( const Timeout &timeout) { lldb_private::StreamString stream; stream.PutCString("qPlatform_shell:"); - stream.PutBytesAsRawHex8(command, strlen(command)); + stream.PutBytesAsRawHex8(command.data(), command.size()); stream.PutChar(','); uint32_t timeout_sec = UINT32_MAX; if (timeout) { diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h index 0159125a433b8..61acfad5d3136 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h @@ -399,7 +399,7 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase { bool GetFileExists(const FileSpec &file_spec); Status RunShellCommand( - const char *command, // Shouldn't be nullptr + llvm::StringRef command, const FileSpec &working_dir, // Pass empty FileSpec to use the current // working directory int *status_ptr, // Pass nullptr if you don't want the process exit status diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index e867b8db47232..7416ea6dd40c1 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -1319,7 +1319,23 @@ MmapArgList Platform::GetMmapArgumentList(const ArchSpec &arch, addr_t addr, } lldb_private::Status Platform::RunShellCommand( - const char *command, // Shouldn't be nullptr + llvm::StringRef command, + const FileSpec & + working_dir, // Pass empty FileSpec to use the current working directory + int *status_ptr, // Pass nullptr if you don't want the process exit status + int *signo_ptr, // Pass nullptr if you don't want the signal that caused the + // process to exit + std::string + *command_output, // Pass nullptr if you don't want the command output + const Timeout &timeout) { + return RunShellCommand(llvm::StringRef(), command, working_dir, status_ptr, + signo_ptr, command_output, timeout); +} + +lldb_private::Status Platform::RunShellCommand( + llvm::StringRef shell, // Pass empty if you want to use the default + // shell interpreter + llvm::StringRef command, // Shouldn't be empty const FileSpec & working_dir, // Pass empty FileSpec to use the current working directory int *status_ptr, // Pass nullptr if you don't want the process exit status @@ -1329,8 +1345,8 @@ lldb_private::Status Platform::RunShellCommand( *command_output, // Pass nullptr if you don't want the command output const Timeout &timeout) { if (IsHost()) - return Host::RunShellCommand(command, working_dir, status_ptr, signo_ptr, - command_output, timeout); + return Host::RunShellCommand(shell, command, working_dir, status_ptr, + signo_ptr, command_output, timeout); else return Status("unimplemented"); } diff --git a/lldb/source/Target/RemoteAwarePlatform.cpp b/lldb/source/Target/RemoteAwarePlatform.cpp index f53158b06b8f9..3a186adca04c7 100644 --- a/lldb/source/Target/RemoteAwarePlatform.cpp +++ b/lldb/source/Target/RemoteAwarePlatform.cpp @@ -171,15 +171,24 @@ Status RemoteAwarePlatform::ResolveExecutable( } Status RemoteAwarePlatform::RunShellCommand( - const char *command, const FileSpec &working_dir, int *status_ptr, + llvm::StringRef command, const FileSpec &working_dir, int *status_ptr, int *signo_ptr, std::string *command_output, const Timeout &timeout) { + return RunShellCommand(llvm::StringRef(), command, working_dir, status_ptr, + signo_ptr, command_output, timeout); +} + +Status RemoteAwarePlatform::RunShellCommand( + llvm::StringRef shell, llvm::StringRef command, const FileSpec &working_dir, + int *status_ptr, int *signo_ptr, std::string *command_output, + const Timeout &timeout) { if (IsHost()) - return Host::RunShellCommand(command, working_dir, status_ptr, signo_ptr, - command_output, timeout); + return Host::RunShellCommand(shell, command, working_dir, status_ptr, + signo_ptr, command_output, timeout); if (m_remote_platform_sp) - return m_remote_platform_sp->RunShellCommand( - command, working_dir, status_ptr, signo_ptr, command_output, timeout); + return m_remote_platform_sp->RunShellCommand(shell, command, working_dir, + status_ptr, signo_ptr, + command_output, timeout); return Status("unable to run a remote command without a platform"); } diff --git a/lldb/test/API/commands/platform/basic/Makefile b/lldb/test/API/commands/platform/basic/Makefile new file mode 100644 index 0000000000000..3626466f607c1 --- /dev/null +++ b/lldb/test/API/commands/platform/basic/Makefile @@ -0,0 +1,5 @@ +C_SOURCES := myshell.c +CFLAGS_EXTRAS := -g0 # No debug info. +MAKE_DSYM := NO + +include Makefile.rules diff --git a/lldb/test/API/commands/platform/basic/TestPlatformCommand.py b/lldb/test/API/commands/platform/basic/TestPlatformCommand.py index 570f9b3f828db..dc1701258246a 100644 --- a/lldb/test/API/commands/platform/basic/TestPlatformCommand.py +++ b/lldb/test/API/commands/platform/basic/TestPlatformCommand.py @@ -13,6 +13,7 @@ class PlatformCommandTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) + NO_DEBUG_INFO_TESTCASE = True @no_debug_info_test def test_help_platform(self): @@ -92,3 +93,11 @@ def test_shell_timeout(self): "error: timed out waiting for shell command to complete"]) self.expect("shell -t 1 -- sleep 3", error=True, substrs=[ "error: timed out waiting for shell command to complete"]) + + @no_debug_info_test + def test_host_shell_interpreter(self): + """ Test the host platform shell with a different interpreter """ + self.build() + exe = self.getBuildArtifact('a.out') + self.expect("platform shell -h -s " + exe + " -- 'echo $0'", + substrs=['SUCCESS', 'a.out']) diff --git a/lldb/test/API/commands/platform/basic/TestPlatformPython.py b/lldb/test/API/commands/platform/basic/TestPlatformPython.py index ab10d30b6ff57..0063621e58007 100644 --- a/lldb/test/API/commands/platform/basic/TestPlatformPython.py +++ b/lldb/test/API/commands/platform/basic/TestPlatformPython.py @@ -79,3 +79,20 @@ def test_available_platform_list(self): self.assertEqual( desc_data.GetType(), lldb.eStructuredDataTypeString, 'Platform description is a string') + + @add_test_categories(['pyapi']) + @no_debug_info_test + def test_shell_interpreter(self): + """ Test a shell with a custom interpreter """ + platform = self.dbg.GetSelectedPlatform() + self.assertTrue(platform.IsValid()) + + sh_cmd = lldb.SBPlatformShellCommand('/bin/zsh', 'echo $0') + self.assertIn('/bin/zsh', sh_cmd.GetShell()) + self.assertIn('echo $0', sh_cmd.GetCommand()) + + self.build() + sh_cmd.SetShell(self.getBuildArtifact('a.out')) + err = platform.Run(sh_cmd) + self.assertTrue(err.Success()) + self.assertIn("SUCCESS", sh_cmd.GetOutput()) diff --git a/lldb/test/API/commands/platform/basic/myshell.c b/lldb/test/API/commands/platform/basic/myshell.c new file mode 100644 index 0000000000000..d1c0eecb943e9 --- /dev/null +++ b/lldb/test/API/commands/platform/basic/myshell.c @@ -0,0 +1,24 @@ +#include +#include +#include + +int main(int argc, char *argv[]) { + if (argc < 3) { + fprintf(stderr, "ERROR: Too few arguments (count: %d).\n", argc - 1); + exit(1); + } + +#ifdef WIN32 + char *cmd_opt = "/C"; +#else + char *cmd_opt = "-c"; +#endif + + if (strncmp(argv[1], cmd_opt, 2)) { + fprintf(stderr, "ERROR: Missing shell command option ('%s').\n", cmd_opt); + exit(1); + } + + printf("SUCCESS: %s\n", argv[0]); + return 0; +} From b6b63684b19813eda9d1b81a113304e7735f0d5c Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Wed, 2 Sep 2020 23:09:48 +0800 Subject: [PATCH 010/465] [NFC] [PowerPC] Add FMA flag propagation test --- llvm/test/CodeGen/PowerPC/fma-combine.ll | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll index 6683d925a1b16..bf2abe0b6b837 100644 --- a/llvm/test/CodeGen/PowerPC/fma-combine.ll +++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll @@ -239,4 +239,26 @@ define double @getNegatedExpression_crash(double %x, double %y) { %fma1 = call reassoc nsz double @llvm.fma.f64(double %fma, double %y, double %add) ret double %fma1 } + +define double @fma_flag_propagation(double %a) { +; CHECK-FAST-LABEL: fma_flag_propagation: +; CHECK-FAST: # %bb.0: # %entry +; CHECK-FAST-NEXT: xssubdp 1, 1, 1 +; CHECK-FAST-NEXT: blr +; +; CHECK-FAST-NOVSX-LABEL: fma_flag_propagation: +; CHECK-FAST-NOVSX: # %bb.0: # %entry +; CHECK-FAST-NOVSX-NEXT: fsub 1, 1, 1 +; CHECK-FAST-NOVSX-NEXT: blr +; +; CHECK-LABEL: fma_flag_propagation: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xssubdp 1, 1, 1 +; CHECK-NEXT: blr +entry: + %0 = fneg double %a + %1 = call reassoc nnan double @llvm.fma.f64(double %0, double 1.0, double %a) + ret double %1 +} + declare double @llvm.fma.f64(double, double, double) nounwind readnone From ec489ae048fd971b22400c61458a5295eeba368a Mon Sep 17 00:00:00 2001 From: Congzhe Cao Date: Wed, 2 Sep 2020 11:02:58 -0400 Subject: [PATCH 011/465] [IPSCCP] Fix a bug that the "returned" attribute is not cleared when function is optimized to return undef In IPSCCP when a function is optimized to return undef, it should clear the returned attribute for all its input arguments and its corresponding call sites. The bug is exposed when the value of an input argument of the function is assigned to a physical register and because of the argument having a returned attribute, the value of this physical register will continue to be used as the function return value right after the call instruction returns, even if the value that this register holds may be clobbered during the function call. This potentially results in incorrect values being used afterwards. Reviewed By: jdoerfert, fhahn Differential Revision: https://reviews.llvm.org/D84220 --- llvm/lib/Transforms/Scalar/SCCP.cpp | 19 ++++++ .../Transforms/SCCP/ipsccp-clear-returned.ll | 62 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 llvm/test/Transforms/SCCP/ipsccp-clear-returned.ll diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 57befc9c3cfb3..2afc778ed8214 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -2112,9 +2113,27 @@ bool llvm::runIPSCCP( } // Zap all returns which we've identified as zap to change. + SmallSetVector FuncZappedReturn; for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) { Function *F = ReturnsToZap[i]->getParent()->getParent(); ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType())); + // Record all functions that are zapped. + FuncZappedReturn.insert(F); + } + + // Remove the returned attribute for zapped functions and the + // corresponding call sites. + for (Function *F : FuncZappedReturn) { + for (Argument &A : F->args()) + F->removeParamAttr(A.getArgNo(), Attribute::Returned); + for (Use &U : F->uses()) { + // Skip over blockaddr users. + if (isa(U.getUser())) + continue; + CallBase *CB = cast(U.getUser()); + for (Use &Arg : CB->args()) + CB->removeParamAttr(CB->getArgOperandNo(&Arg), Attribute::Returned); + } } // If we inferred constant or undef values for globals variables, we can diff --git a/llvm/test/Transforms/SCCP/ipsccp-clear-returned.ll b/llvm/test/Transforms/SCCP/ipsccp-clear-returned.ll new file mode 100644 index 0000000000000..d8b5fbff4e628 --- /dev/null +++ b/llvm/test/Transforms/SCCP/ipsccp-clear-returned.ll @@ -0,0 +1,62 @@ +; if IPSCCP determines a function returns undef, +; then the "returned" attribute of input arguments +; should be cleared. + +; RUN: opt < %s -ipsccp -S | FileCheck %s +define i32 @main() { +; CHECK-LABEL: @main +entry: +; CHECK-NEXT: entry: + %call = call i32 @func_return_undef(i32 returned 1) +; CHECK: call i32 @func_return_undef(i32 1) +; CHECK-NOT: returned + ret i32 %call +; CHECK: ret i32 1 +} + +define internal i32 @func_return_undef(i32 returned %arg) { +; CHECK: {{define.*@func_return_undef}} +; CHECK-NOT: returned +entry: +; CHECK-NEXT: entry: +; CHECK-NEXT: {{ret.*undef}} + ret i32 %arg +} + + +; The only case that users of zapped functions are non-call site +; users is that they are blockaddr users. Skip them because we +; want to remove the returned attribute for call sites + +; CHECK: {{define.*@blockaddr_user}} +; CHECK-NOT: returned +define internal i32 @blockaddr_user(i1 %c, i32 returned %d) { +entry: + br i1 %c, label %bb1, label %bb2 + +bb1: + br label %branch.block + +bb2: + br label %branch.block + +branch.block: + %addr = phi i8* [blockaddress(@blockaddr_user, %target1), %bb1], [blockaddress(@blockaddr_user, %target2), %bb2] + indirectbr i8* %addr, [label %target1, label %target2] + +target1: + br label %target2 + +; CHECK: ret i32 undef +target2: + ret i32 %d +} + +define i32 @call_blockaddr_user(i1 %c) { +; CHECK-LABEL: define i32 @call_blockaddr_user( +; CHECK-NEXT: %r = call i32 @blockaddr_user(i1 %c +; CHECK-NOT: returned +; CHECK-NEXT: ret i32 10 + %r = call i32 @blockaddr_user(i1 %c, i32 returned 10) + ret i32 %r +} From c5aa63dd560b9cf5825c1e4da2a9ee53dbd772f3 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Wed, 2 Sep 2020 08:24:06 -0700 Subject: [PATCH 012/465] [lldb/Host] Add missing proc states The /proc//status parsing is missing a few cases: - Idle - Parked - Dead If we encounter an unknown proc state, this leads to an msan warning. In reality, we only check that the state != Zombie, so it doesn't really matter that we handle all cases, but handle them anyway (current list: [1]). Also explicitly set it to unknown if we encounter an unknown state. There will still be an msan warning if the proc entry has no `State:` line, but that should not happen. Use a StringSwitch to make the handling of proc states a little more compact. [1] https://github.com/torvalds/linux/blob/master/fs/proc/array.c Reviewed By: labath Differential Revision: https://reviews.llvm.org/D86818 --- lldb/source/Host/linux/Host.cpp | 41 ++++++++++++++++----------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/lldb/source/Host/linux/Host.cpp b/lldb/source/Host/linux/Host.cpp index 45973f5d214b2..520a00df35f6d 100644 --- a/lldb/source/Host/linux/Host.cpp +++ b/lldb/source/Host/linux/Host.cpp @@ -16,6 +16,7 @@ #include #include +#include "llvm/ADT/StringSwitch.h" #include "llvm/Object/ELF.h" #include "llvm/Support/ScopedPrinter.h" @@ -35,8 +36,11 @@ using namespace lldb_private; namespace { enum class ProcessState { Unknown, + Dead, DiskSleep, + Idle, Paging, + Parked, Running, Sleeping, TracedOrStopped, @@ -50,12 +54,14 @@ class ProcessLaunchInfo; static bool GetStatusInfo(::pid_t Pid, ProcessInstanceInfo &ProcessInfo, ProcessState &State, ::pid_t &TracerPid) { + Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_HOST); + auto BufferOrError = getProcFile(Pid, "status"); if (!BufferOrError) return false; llvm::StringRef Rest = BufferOrError.get()->getBuffer(); - while(!Rest.empty()) { + while (!Rest.empty()) { llvm::StringRef Line; std::tie(Line, Rest) = Rest.split('\n'); @@ -84,26 +90,19 @@ static bool GetStatusInfo(::pid_t Pid, ProcessInstanceInfo &ProcessInfo, Line.ltrim().consumeInteger(10, PPid); ProcessInfo.SetParentProcessID(PPid); } else if (Line.consume_front("State:")) { - char S = Line.ltrim().front(); - switch (S) { - case 'R': - State = ProcessState::Running; - break; - case 'S': - State = ProcessState::Sleeping; - break; - case 'D': - State = ProcessState::DiskSleep; - break; - case 'Z': - State = ProcessState::Zombie; - break; - case 'T': - State = ProcessState::TracedOrStopped; - break; - case 'W': - State = ProcessState::Paging; - break; + State = llvm::StringSwitch(Line.ltrim().take_front(1)) + .Case("D", ProcessState::DiskSleep) + .Case("I", ProcessState::Idle) + .Case("R", ProcessState::Running) + .Case("S", ProcessState::Sleeping) + .CaseLower("T", ProcessState::TracedOrStopped) + .Case("W", ProcessState::Paging) + .Case("P", ProcessState::Parked) + .Case("X", ProcessState::Dead) + .Case("Z", ProcessState::Zombie) + .Default(ProcessState::Unknown); + if (State == ProcessState::Unknown) { + LLDB_LOG(log, "Unknown process state {0}", Line); } } else if (Line.consume_front("TracerPid:")) { Line = Line.ltrim(); From 39cf83cc78ff0a017fb9de27d45b87217aa6f558 Mon Sep 17 00:00:00 2001 From: Ehsan Toosi Date: Fri, 31 Jul 2020 15:20:37 +0200 Subject: [PATCH 013/465] [mlir] Extend BufferAssignmentTypeConverter with result conversion callbacks In this PR, the users of BufferPlacement can configure BufferAssginmentTypeConverter. These new configurations would give the user more freedom in the process of converting function signature, and return and call operation conversions. These are the new features: - Accepting callback functions for decomposing types (i.e. 1 to N type conversion such as unpacking tuple types). - Defining ResultConversionKind for specifying whether a function result with a certain type should be appended to the function arguments list or should be kept as function result. (Usage: converter.setResultConversionKind(AppendToArgumentList)) - Accepting callback functions for composing or decomposing values (i.e. N to 1 and 1 to N value conversion). Differential Revision: https://reviews.llvm.org/D85133 --- .../include/mlir/Transforms/BufferPlacement.h | 344 +++++++++--------- .../Linalg/Transforms/TensorsToBuffers.cpp | 11 +- mlir/lib/Transforms/BufferPlacement.cpp | 220 ++++++++++- ...nt-preparation-allowed-memref-results.mlir | 66 ++++ .../buffer-placement-preparation.mlir | 85 +++++ mlir/test/lib/Dialect/Test/TestOps.td | 29 +- .../lib/Transforms/TestBufferPlacement.cpp | 48 ++- 7 files changed, 612 insertions(+), 191 deletions(-) diff --git a/mlir/include/mlir/Transforms/BufferPlacement.h b/mlir/include/mlir/Transforms/BufferPlacement.h index f8559a9dd9396..b3db7794fd971 100644 --- a/mlir/include/mlir/Transforms/BufferPlacement.h +++ b/mlir/include/mlir/Transforms/BufferPlacement.h @@ -52,6 +52,111 @@ class BufferAssignmentPlacer { Operation *operation; }; +/// A helper type converter class for using inside Buffer Assignment operation +/// conversion patterns. The default constructor keeps all the types intact +/// except for the ranked-tensor types which is converted to memref types. +class BufferAssignmentTypeConverter : public TypeConverter { +public: + /// This enum is for showing how buffer placement operation converters should + /// conduct with certain result type after type conversion. This value can be + /// set/get for each specific type using setResultConversionKind or + /// getResultConversionKind. + enum ResultConversionKind { AppendToArgumentsList, KeepAsFunctionResult }; + + BufferAssignmentTypeConverter(); + + /// This method tries to decompose a value of a certain type using provided + /// decompose callback functions. If it is unable to do so, the original value + /// is returned. + void tryDecomposeValue(OpBuilder &, Location, Type, Value, + SmallVectorImpl &); + + /// This method tries to decompose a type using provided decompose callback + /// functions. If it is unable to do so, the original type is returned. + void tryDecomposeType(Type, SmallVectorImpl &); + + /// This method registers a callback function that will be called to decompose + /// a value of a certain type into several values. + template ::template arg_t<2>> + void addDecomposeValueConversion(FnT &&callback) { + decomposeValueConversions.emplace_back( + wrapDecomposeValueConversionCallback(std::forward(callback))); + } + + /// This method registers a callback function that will be called to decompose + /// a type into several types. + template ::template arg_t<0>> + void addDecomposeTypeConversion(FnT &&callback) { + auto wrapper = + wrapDecomposeTypeConversionCallback(std::forward(callback)); + decomposeTypeConversions.emplace_back(wrapper); + addConversion(std::forward(callback)); + } + + /// This method returns ResultConversionKind for the mapping from `origin` + /// type to `input` type. + ResultConversionKind getResultConversionKind(Type origin, Type input); + + /// This method registers ResultConversionKind for the mapping from type 'T' + /// to type 'U'. + template + void setResultConversionKind(ResultConversionKind kind) { + assert((kind != AppendToArgumentsList || + llvm::is_one_of::value) && + "Only the memref typed values can be set to be appended to the " + "function argument list at the moment"); + resultTypeConversions.emplace_back( + [=](Type origin, Type input) -> Optional { + if (origin.template isa() && input.template isa()) + return kind; + return llvm::None; + }); + } + +private: + using DecomposeValueConversionCallFn = std::function( + OpBuilder &, Location, Type, Value, SmallVectorImpl &)>; + + using DecomposeTypeConversionCallFn = + std::function(Type, SmallVectorImpl &)>; + + using ResultConversionKindFn = + std::function(Type, Type)>; + + /// Generate a wrapper for the given decompose value conversion callback. + template + DecomposeValueConversionCallFn + wrapDecomposeValueConversionCallback(FnT &&callback) { + return [callback = std::forward(callback)]( + OpBuilder &builder, Location loc, Type type, Value value, + SmallVectorImpl &newValues) -> Optional { + if (T derivedType = type.dyn_cast()) + return callback(builder, loc, derivedType, value, newValues); + return llvm::None; + }; + } + + /// Generate a wrapper for the given decompose type conversion callback. + template + DecomposeTypeConversionCallFn + wrapDecomposeTypeConversionCallback(FnT &&callback) { + return [callback = std::forward(callback)]( + Type type, + SmallVectorImpl &results) -> Optional { + T derivedType = type.dyn_cast(); + if (!derivedType) + return llvm::None; + return callback(derivedType, results); + }; + } + + SmallVector resultTypeConversions; + SmallVector decomposeValueConversions; + SmallVector decomposeTypeConversions; +}; + /// Helper conversion pattern that encapsulates a BufferAssignmentPlacer /// instance. Sample usage: /// class CustomConversionPattern : public @@ -68,43 +173,22 @@ class BufferAssignmentOpConversionPattern public: explicit BufferAssignmentOpConversionPattern( MLIRContext *context, BufferAssignmentPlacer *bufferAssignment = nullptr, - TypeConverter *converter = nullptr, PatternBenefit benefit = 1) + BufferAssignmentTypeConverter *converter = nullptr, + PatternBenefit benefit = 1) : OpConversionPattern(context, benefit), - bufferAssignment(bufferAssignment), converter(converter) {} + bufferAssignment(bufferAssignment), converter(converter) { + assert(converter && "The type converter has not been defined"); + } protected: BufferAssignmentPlacer *bufferAssignment; - TypeConverter *converter; -}; - -/// A helper type converter class for using inside Buffer Assignment operation -/// conversion patterns. The default constructor keeps all the types intact -/// except for the ranked-tensor types which is converted to memref types. -class BufferAssignmentTypeConverter : public TypeConverter { -public: - BufferAssignmentTypeConverter(); - - /// A helper function to check if `type` has been converted from non-memref - /// type to memref. - static bool isConvertedMemref(Type type, Type before); + BufferAssignmentTypeConverter *converter; }; -namespace detail { - -/// Converts the signature of the function based on whether the function is -/// allowed to return memref typed results or not using -/// `allowMemrefFunctionResults` parameter. If this option is false, then it -/// adds an extra function argument as an output buffer for each function result -/// which is going to be a memref type only after type conversion. The -/// other function result types remain unchanged. If -/// `allowMemrefFunctionResults` is true, the types are converted in place. -/// Any changes in function signature need to be applied -/// to return and caller operations. `BufferAssignmentReturnOpConverter` and -/// `BufferAssignmentCallOpConverter` are two helper function that match the -/// return and caller operation with the new function signature. Furthermore, -/// `BufferAssignmentTypeConverter` is a helper `TypeConverter` for converting -/// tensor typed values to memref typed ones. -template +/// Converts the signature of the function using BufferAssignmentTypeConverter. +/// Each result type of the function is kept as a function result or appended to +/// the function arguments list based on ResultConversionKind for the converted +/// result type. class BufferAssignmentFuncOpConverter : public BufferAssignmentOpConversionPattern { public: @@ -112,58 +196,16 @@ class BufferAssignmentFuncOpConverter FuncOp>::BufferAssignmentOpConversionPattern; /// Performs the actual signature rewriting step. - LogicalResult - matchAndRewrite(mlir::FuncOp funcOp, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final { - if (!converter) - return funcOp.emitError("The type converter has not been defined for " - "BufferAssignmentFuncOpConverter"); - auto funcType = funcOp.getType(); - - // Convert function arguments using the provided TypeConverter. - TypeConverter::SignatureConversion conversion(funcType.getNumInputs()); - for (auto argType : llvm::enumerate(funcType.getInputs())) - conversion.addInputs(argType.index(), - converter->convertType(argType.value())); - - // If allowMemrefFunctionResults is false and a function result type is not - // a memref but it would be a memref after type conversion, a new argument - // should be appended to the function arguments list for this result. - // Otherwise, it remains unchanged as a function result. - SmallVector newResultTypes; - newResultTypes.reserve(funcOp.getNumResults()); - for (Type resType : funcType.getResults()) { - Type convertedType = converter->convertType(resType); - if (!allowMemrefFunctionResults && - BufferAssignmentTypeConverter::isConvertedMemref(convertedType, - resType)) - conversion.addInputs(convertedType); - else - newResultTypes.push_back(convertedType); - } - if (failed(rewriter.convertRegionTypes(&funcOp.getBody(), *converter, - &conversion))) - return failure(); - - // Update the signature of the function. - rewriter.updateRootInPlace(funcOp, [&] { - funcOp.setType(rewriter.getFunctionType(conversion.getConvertedTypes(), - newResultTypes)); - }); - return success(); - } + LogicalResult matchAndRewrite(mlir::FuncOp, ArrayRef, + ConversionPatternRewriter &) const; }; /// Rewrites the `ReturnOp` to conform with the changed function signature. -/// if allowMemrefFunctionResults is false, operands that correspond to return -/// values and have been rewritten from illegal typed results to memref -/// arguments are dropped. In their place, a corresponding copy operation from -/// the operand to the output function argument is inserted. Otherwise, the -/// memref typed operands are returned. -/// Note: If this pattern rewriter is used with BufferAssignmentFuncOpConverter, -/// allowMemrefFunctionResults must be set/unset for both. +/// Operands that correspond to return values and their types have been set to +/// AppendToArgumentsList are dropped. In their place, a corresponding copy +/// operation from the operand to the target function argument is inserted. template + typename CopyOpTy> class BufferAssignmentReturnOpConverter : public BufferAssignmentOpConversionPattern { public: @@ -174,44 +216,48 @@ class BufferAssignmentReturnOpConverter LogicalResult matchAndRewrite(ReturnOpSourceTy returnOp, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { - // If the memref typed results can be returned as function results, the new - // `ReturnOp` should only return the type converted operands. - if (allowMemrefFunctionResults) { - rewriter.replaceOpWithNewOp(returnOp, operands); - return success(); + Location loc = returnOp.getLoc(); + + // Split the operands depending on whether they need a copy operation or + // they remain as operands of the return operation. If an operand is + // decomposable and a decompose callback function has been provided by the + // user, it will be unpacked. + SmallVector newOperands, needCopyOperands; + OpBuilder builder(returnOp); + for (auto operand : llvm::enumerate(operands)) { + SmallVector values; + this->converter->tryDecomposeValue( + builder, loc, operand.value().getType(), operand.value(), values); + Type type = returnOp.getOperand(operand.index()).getType(); + SmallVector originTypes; + this->converter->tryDecomposeType(type, originTypes); + for (auto value : llvm::enumerate(values)) { + Type origin = originTypes[value.index()]; + Type converted = value.value().getType(); + auto kind = this->converter->getResultConversionKind(origin, converted); + if (kind == BufferAssignmentTypeConverter::KeepAsFunctionResult) + newOperands.push_back(value.value()); + else + // kind = BufferAssignmentTypeConverter::AppendToArgumentsList + needCopyOperands.push_back(value.value()); + } } - // Split the operands by their kinds whether they are converted memref or - // not. - SmallVector needCopyOperands, newOperands; - unsigned operandsSize = operands.size(); - needCopyOperands.reserve(operandsSize); - newOperands.reserve(operandsSize); - for (auto operand : llvm::enumerate(operands)) - if (BufferAssignmentTypeConverter::isConvertedMemref( - operand.value().getType(), - returnOp.getOperand(operand.index()).getType())) - needCopyOperands.push_back(operand.value()); - else - newOperands.push_back(operand.value()); - + // Insert Copy operations instead for the operands that have been removed + // from operand list and appended to the function arguments list. Block &entryBlock = returnOp.getParentRegion()->front(); unsigned numFuncArgs = entryBlock.getNumArguments(); - - // Find the index of the first destination buffer. - assert(needCopyOperands.size() <= numFuncArgs && - "The number of operands of return operation is more than the " - "number of function arguments."); + if (needCopyOperands.size() > numFuncArgs) + return returnOp.emitError( + "The number of operands that need Copy operations is more " + "than the number of target function arguments."); unsigned destArgNum = numFuncArgs - needCopyOperands.size(); rewriter.setInsertionPoint(returnOp); for (Value operand : needCopyOperands) { - // Insert a `CopyOp` for each converted memref-type operand. - rewriter.create(returnOp.getLoc(), operand, + rewriter.create(loc, operand, entryBlock.getArgument(destArgNum)); ++destArgNum; } - - // Insert the new target Return operation. rewriter.replaceOpWithNewOp(returnOp, newOperands); return success(); } @@ -219,94 +265,32 @@ class BufferAssignmentReturnOpConverter /// Rewrites the `CallOp` to match its operands and results with the signature /// of the callee after rewriting the callee with -/// BufferAssignmentFuncOpConverter. If allowMemrefFunctionResults is false, a -/// buffer is allocated as an output buffer only for each memref typed result -/// that has been rewritten. The new allocated buffer is passed through the -/// operands list of the new `CallOp`. -/// Note: If this pattern rewriter is used with BufferAssignmentFuncOpConverter, -/// allowMemrefFunctionResults must be set/unset for both. -template +/// BufferAssignmentFuncOpConverter. class BufferAssignmentCallOpConverter : public BufferAssignmentOpConversionPattern { public: using BufferAssignmentOpConversionPattern< CallOp>::BufferAssignmentOpConversionPattern; - LogicalResult - matchAndRewrite(CallOp callOp, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final { - if (!converter) - return callOp.emitError("The type converter has not been defined for " - "BufferAssignmentCallOpConverter"); - Location loc = callOp.getLoc(); - - // If the memref typed results can be returned as function results, there is - // no need to create output buffers. It is only required to convert the type - // of operands and results in place for creating the new `CallOp`. - if (allowMemrefFunctionResults) { - SmallVector resultTypes; - resultTypes.reserve(callOp.getNumResults()); - for (Type type : callOp.getResultTypes()) - resultTypes.push_back(converter->convertType(type)); - rewriter.replaceOpWithNewOp(callOp, callOp.getCallee(), - resultTypes, operands); - return success(); - } - - SmallVector newOperands, replacingValues; - SmallVector newResultTypes; - unsigned numResults = callOp.getNumResults(); - newOperands.reserve(numResults + operands.size()); - newOperands.append(operands.begin(), operands.end()); - newResultTypes.reserve(numResults); - replacingValues.reserve(numResults); - - // For each memref result of `CallOp` which has not been a memref before - // the type conversion, a new buffer is allocated and passed to the operands - // list of the new `CallOp`. Otherwise, it remains as a caller result. - for (Value result : callOp.getResults()) { - Type currType = result.getType(); - Type newType = converter->convertType(result.getType()); - if (BufferAssignmentTypeConverter::isConvertedMemref(newType, currType)) { - OpBuilder::InsertionGuard guard(rewriter); - rewriter.restoreInsertionPoint(bufferAssignment->computeAllocPosition( - result.dyn_cast())); - Value alloc = - rewriter.create(loc, newType.dyn_cast()); - newOperands.push_back(alloc); - replacingValues.push_back(alloc); - } else { - newResultTypes.push_back(currType); - - // No replacing is required. - replacingValues.push_back(nullptr); - } - } - - // Creating the new `CallOp`. - rewriter.create(loc, callOp.getCallee(), newResultTypes, - newOperands); - - // Replacing the results of the old `CallOp`. - rewriter.replaceOp(callOp, replacingValues); - return success(); - } + /// Performs the actual rewriting step. + LogicalResult matchAndRewrite(CallOp, ArrayRef, + ConversionPatternRewriter &) const; }; -} // end namespace detail /// Populates `patterns` with the conversion patterns of buffer /// assignment. template + typename CopyOpTy> static void populateWithBufferAssignmentOpConversionPatterns( MLIRContext *context, BufferAssignmentPlacer *placer, - TypeConverter *converter, OwningRewritePatternList *patterns) { + BufferAssignmentTypeConverter *converter, + OwningRewritePatternList *patterns) { // clang-format off patterns->insert< - detail::BufferAssignmentCallOpConverter, - detail::BufferAssignmentFuncOpConverter, - detail::BufferAssignmentReturnOpConverter - + BufferAssignmentCallOpConverter, + BufferAssignmentFuncOpConverter, + BufferAssignmentReturnOpConverter + >(context, placer, converter); // clang-format on } diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp index 04c1fbd5d5655..89a01f9ca6292 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp @@ -100,11 +100,11 @@ class GenericOpConverter /// tensors to buffers. static void populateConvertLinalgOnTensorsToBuffersPattern( MLIRContext *context, BufferAssignmentPlacer *placer, - TypeConverter *converter, OwningRewritePatternList *patterns) { + BufferAssignmentTypeConverter *converter, + OwningRewritePatternList *patterns) { populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp, - /*allowMemrefFunctionResults=*/false>(context, placer, converter, - patterns); + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, placer, + converter, patterns); patterns->insert(context, placer, converter); } @@ -141,6 +141,9 @@ struct ConvertLinalgOnTensorsToBuffers converter.isLegal(&funcOp.getBody()); }); + converter.setResultConversionKind( + BufferAssignmentTypeConverter::AppendToArgumentsList); + // Walk over all the functions to apply buffer assignment. getOperation().walk([&](FuncOp function) -> WalkResult { OwningRewritePatternList patterns; diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp index 201570a244ffc..1ab3e7e2e48dc 100644 --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -713,9 +713,223 @@ BufferAssignmentTypeConverter::BufferAssignmentTypeConverter() { }); } -/// Checks if `type` has been converted from non-memref type to memref. -bool BufferAssignmentTypeConverter::isConvertedMemref(Type type, Type before) { - return type.isa() && !before.isa(); +/// This method tries to decompose a value of a certain type using provided +/// decompose callback functions. If it is unable to do so, the original value +/// is returned. +void BufferAssignmentTypeConverter::tryDecomposeValue( + OpBuilder &builder, Location loc, Type type, Value value, + SmallVectorImpl &results) { + for (auto conversion : decomposeValueConversions) + if (conversion(builder, loc, type, value, results) != llvm::None) + return; + results.push_back(value); +} + +/// This method tries to decompose a type using provided decompose callback +/// functions. If it is unable to do so, the original type is returned. +void BufferAssignmentTypeConverter::tryDecomposeType( + Type type, SmallVectorImpl &types) { + for (auto conversion : decomposeTypeConversions) + if (conversion(type, types) != llvm::None) + return; + types.push_back(type); +} + +/// This method returns ResultConversionKind for the input type. +BufferAssignmentTypeConverter::ResultConversionKind +BufferAssignmentTypeConverter::getResultConversionKind(Type origin, + Type converted) { + for (auto conversion : resultTypeConversions) { + auto res = conversion(origin, converted); + if (res != llvm::None) + return res.getValue(); + } + return KeepAsFunctionResult; +} + +//===----------------------------------------------------------------------===// +// BufferAssignmentFuncOpConverter +//===----------------------------------------------------------------------===// + +/// Performs the actual function signature rewriting step. +LogicalResult BufferAssignmentFuncOpConverter::matchAndRewrite( + mlir::FuncOp funcOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + auto funcType = funcOp.getType(); + + // Convert function arguments using the provided TypeConverter. + TypeConverter::SignatureConversion conversion(funcType.getNumInputs()); + for (auto argType : llvm::enumerate(funcType.getInputs())) { + SmallVector decomposedTypes, convertedTypes; + converter->tryDecomposeType(argType.value(), decomposedTypes); + converter->convertTypes(decomposedTypes, convertedTypes); + conversion.addInputs(argType.index(), convertedTypes); + } + + // Convert the result types of the function. + SmallVector newResultTypes; + newResultTypes.reserve(funcOp.getNumResults()); + for (Type resultType : funcType.getResults()) { + SmallVector originTypes; + converter->tryDecomposeType(resultType, originTypes); + for (auto origin : originTypes) { + Type converted = converter->convertType(origin); + auto kind = converter->getResultConversionKind(origin, converted); + if (kind == BufferAssignmentTypeConverter::AppendToArgumentsList) + conversion.addInputs(converted); + else + // kind = BufferAssignmentTypeConverter::KeepAsFunctionResult + newResultTypes.push_back(converted); + } + } + + if (failed(rewriter.convertRegionTypes(&funcOp.getBody(), *converter, + &conversion))) + return failure(); + + // Update the signature of the function. + rewriter.updateRootInPlace(funcOp, [&] { + funcOp.setType(rewriter.getFunctionType(conversion.getConvertedTypes(), + newResultTypes)); + }); + return success(); +} + +//===----------------------------------------------------------------------===// +// BufferAssignmentCallOpConverter +//===----------------------------------------------------------------------===// + +/// Performs the actual rewriting step. +LogicalResult BufferAssignmentCallOpConverter::matchAndRewrite( + CallOp callOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + + // This class represents a mapping from a result to a list of values and some + // results that have not yet constructed. Instead, the indices of these + // results in the operation that will be constructed are known. They will be + // replaced with the actual values when they are available. The order of + // adding to this mapping is important. + class ResultMapping { + public: + ResultMapping() { order = 0; }; + + /// Add an available value to the mapping. + void addMapping(Value value) { + toValuesMapping.push_back({order++, value}); + } + + /// Add the index of unavailble result value to the mapping. + void addMapping(unsigned index) { + toIndicesMapping.push_back({order++, index}); + } + + /// This method returns the mapping values list. The unknown result values + /// that only their indicies are available are replaced with their values. + void getMappingValues(ValueRange valuesToReplaceIndices, + SmallVectorImpl &values) { + // Append available values to the list. + SmallVector, 2> res(toValuesMapping.begin(), + toValuesMapping.end()); + // Replace the indices with the actual values. + llvm::for_each( + toIndicesMapping, [&](const std::pair &entry) { + assert(entry.second < valuesToReplaceIndices.size() && + "The value index is out of range."); + res.push_back({entry.first, valuesToReplaceIndices[entry.second]}); + }); + // Sort the values based on their adding orders. + llvm::sort(res, [](const std::pair &v1, + const std::pair &v2) { + return v1.first < v2.first; + }); + // Fill the values. + llvm::for_each(res, [&](const std::pair &entry) { + values.push_back(entry.second); + }); + } + + private: + /// Keeping the inserting order of mapping values. + int order; + + /// Containing the mapping values with their inserting orders. + SmallVector, 2> toValuesMapping; + + /// Containing the indices of result values with their inserting orders. + SmallVector, 2> toIndicesMapping; + }; + + Location loc = callOp.getLoc(); + OpBuilder builder(callOp); + SmallVector newOperands; + + // Create the operands list of the new `CallOp`. It unpacks the decomposable + // values if a decompose callback function has been provided by the user. + for (auto operand : operands) { + SmallVector values; + this->converter->tryDecomposeValue(builder, loc, operand.getType(), operand, + values); + newOperands.append(values.begin(), values.end()); + } + + // Create the new result types for the new `CallOp` and a mapping from the old + // result to new value(s). + SmallVector newResultTypes; + SmallVector mappings; + mappings.resize(callOp.getNumResults()); + for (auto result : llvm::enumerate(callOp.getResults())) { + SmallVector originTypes; + converter->tryDecomposeType(result.value().getType(), originTypes); + auto &resultMapping = mappings[result.index()]; + for (Type origin : originTypes) { + Type converted = converter->convertType(origin); + auto kind = converter->getResultConversionKind(origin, converted); + if (kind == BufferAssignmentTypeConverter::KeepAsFunctionResult) { + newResultTypes.push_back(converted); + // The result value is not yet available. Its index is kept and it is + // replaced with the actual value of the new `CallOp` later. + resultMapping.addMapping(newResultTypes.size() - 1); + } else { + // kind = BufferAssignmentTypeConverter::AppendToArgumentsList + OpBuilder::InsertionGuard guard(rewriter); + rewriter.restoreInsertionPoint( + bufferAssignment->computeAllocPosition(result.value())); + MemRefType memref = converted.dyn_cast(); + if (!memref) + return callOp.emitError("Cannot allocate for a non-Memref type"); + Value alloc = rewriter.create(loc, memref); + newOperands.push_back(alloc); + resultMapping.addMapping(alloc); + } + } + } + + CallOp newCallOp = rewriter.create(loc, callOp.getCallee(), + newResultTypes, newOperands); + + // Build a replacing value for each result to replace its uses. If a result + // has multiple mapping values, it needs to be packed to a single value. + OpBuilder nextBuilder(callOp.getOperation()->getNextNode()); + SmallVector replacedValues; + replacedValues.reserve(callOp.getNumResults()); + for (unsigned i = 0, e = callOp.getNumResults(); i < e; ++i) { + SmallVector valuesToPack; + mappings[i].getMappingValues(newCallOp.getResults(), valuesToPack); + if (valuesToPack.empty()) { + // No replacement is required. + replacedValues.push_back(nullptr); + } else if (valuesToPack.size() == 1) { + replacedValues.push_back(valuesToPack.front()); + } else { + // Values need to be packed using callback function. The same callback + // that is used for materializeArgumentConversion is used for packing. + Value packed = converter->materializeArgumentConversion( + nextBuilder, loc, callOp.getType(i), valuesToPack); + replacedValues.push_back(packed); + } + } + rewriter.replaceOp(callOp, replacedValues); + return success(); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir index 084ac38af6e32..e1dacdf0184e2 100644 --- a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir +++ b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir @@ -111,7 +111,73 @@ func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> { // CHECK: %[[Y:.*]]:2 = call @callee(%[[X]]#0) // CHECK: return %[[Y]]#0 +// ----- + +// Test case: Testing BufferAssginmnetCallOpConverter to see if it matches with the +// signature of the new signature of the callee function when there are tuple typed +// args and results. BufferAssginmentTypeConverter is set to flatten tuple typed +// arguments. The tuple typed values should be decomposed and composed using +// get_tuple_element and make_tuple operations of test dialect. Tensor types are +// converted to Memref. Memref typed function results remain as function results. +// CHECK-LABEL: func @callee +func @callee(%arg0: tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>){ + return %arg0 : tuple,i1, tensor<5xf32>> +} +// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>) +// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) +// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} +// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]] +// CHECK-LABEL: func @caller +func @caller(%arg0: tuple,i1, tensor<5xf32>>) -> tuple,i1, tensor<5xf32>>{ + %x0 = call @callee(%arg0) : (tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>) + %y0 = call @callee(%x0) : (tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>) + return %y0 : tuple,i1, tensor<5xf32>> +} +// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>) +// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) +// CHECK-NEXT: %[[ARG_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 2 : i32} +// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]) +// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>) +// CHECK-NEXT: %[[RESULT_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 2 : i32} +// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]) +// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>) +// CHECK-NEXT: %[[RETURN_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 2 : i32} +// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]] +// ----- +// Test case: Testing BufferAssginmnetFuncOpConverter and +// BufferAssginmentReturnOpConverter to see if the return operation matches with +// the new function signature when there are tuple typed args and results. +// BufferAssginmentTypeConverter is set to flatten tuple typed arguments. The tuple +// typed values should be decomposed and composed using get_tuple_element and +// make_tuple operations of test dialect. Tensor types are converted to Memref. +// Memref typed function results remain as function results. + +// CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results +func @decompose_tuple_typed_function_args_and_results(%arg0: tuple, %arg1: tensor<10xf32>, %arg2: tuple>) -> (tuple>, tensor<10xf32>, tuple){ + return %arg2, %arg1, %arg0 : tuple>, tensor<10xf32>, tuple +} +// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32> +// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, i1, f32) +// CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]]) +// CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]]) +// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[SECOND_TUPLE_SECOND_ELEM]], %[[ARG2]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]] diff --git a/mlir/test/Transforms/buffer-placement-preparation.mlir b/mlir/test/Transforms/buffer-placement-preparation.mlir index 064b0fd7e85a9..b1cfdfd690cf6 100644 --- a/mlir/test/Transforms/buffer-placement-preparation.mlir +++ b/mlir/test/Transforms/buffer-placement-preparation.mlir @@ -285,8 +285,93 @@ func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> { // CHECK: linalg.copy(%[[Y0]], %[[CALLER_RESULT]]) // CHECK: return +// ----- + // CHECK-LABEL: func @func_with_unranked_arg func @func_with_unranked_arg(%arg0: tensor<*xf32>) { return } // CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>) + +// ----- + +// Test case: Testing BufferAssginmnetCallOpConverter to see if it matches with the +// signature of the new signature of the callee function when there are tuple typed +// args and results. BufferAssginmentTypeConverter is set to flatten tuple typed +// arguments. The tuple typed values should be decomposed and composed using +// get_tuple_element and make_tuple operations of test dialect. Tensor types are +// converted to Memref. Memref typed function results are appended to the function +// arguments list. + +// CHECK-LABEL: func @callee +func @callee(%arg0: tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>){ + return %arg0 : tuple,i1, tensor<5xf32>> +} +// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, %[[RESULT1:.*]]: memref<5xf32>) +// CHECK-SAME: i1 +// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} +// CHECK-NEXT: linalg.copy(%[[FIRST_ELEM]], %[[RESULT0]]) +// CHECK-NEXT: linalg.copy(%[[THIRD_ELEM]], %[[RESULT1]]) +// CHECK-NEXT: return %[[SECOND_ELEM]] + + +// CHECK-LABEL: func @caller +func @caller(%arg0: tuple,i1, tensor<5xf32>>) -> tuple,i1, tensor<5xf32>>{ + %x0 = call @callee(%arg0) : (tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>) + %y0 = call @callee(%x0) : (tuple,i1, tensor<5xf32>>) -> (tuple,i1, tensor<5xf32>>) + return %y0 : tuple,i1, tensor<5xf32>> +} +// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>, %[[RESULT1:.*]]: memref<5xf32>) +// CHECK-SAME: i1 +// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() +// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc() +// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]]) +// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1 +// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], %[[CALLEE_RESULT]], %[[SECOND_ALLOC]]) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() +// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc() +// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]]) +// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1 +// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]], %[[CALLEE_RESULT]], %[[SECOND_ALLOC]]) +// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: %[[THIRD_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32} +// CHECK-NEXT: linalg.copy(%[[FIRST_ELEM]], %[[RESULT0]]) +// CHECK-NEXT: linalg.copy(%[[THIRD_ELEM]], %[[RESULT1]]) +// CHECK-NEXT: return %[[SECOND_ELEM]] + +// ----- + +// Test case: Testing BufferAssginmnetFuncOpConverter and +// BufferAssginmentReturnOpConverter to see if the return operation matches with +// the new function signature when there are tuple typed args and results. +// BufferAssginmentTypeConverter is set to flatten tuple typed arguments. The tuple +// typed values should be decomposed and composed using get_tuple_element and +// make_tuple operations of test dialect. Tensor types are converted to Memref. +// Memref typed function results are appended to the function arguments list. + +// CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results +func @decompose_tuple_typed_function_args_and_results(%arg0: tuple, %arg1: tensor<10xf32>, %arg2: tuple>) -> (tuple>, tensor<10xf32>, tuple){ + return %arg2, %arg1, %arg0 : tuple>, tensor<10xf32>, tuple +} +// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<5xf32>, %[[RESULT1:.*]]: memref<10xf32> +// CHECK-SAME: (i1, i1, f32) +// CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]]) +// CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]]) +// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32} +// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32} +// CHECK-NEXT: linalg.copy(%[[SECOND_TUPLE_SECOND_ELEM]], %[[RESULT0]]) +// CHECK-NEXT: linalg.copy(%[[ARG2]], %[[RESULT1]]) +// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]] diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index bc26a8659831d..f03c953396a4a 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -1669,7 +1669,7 @@ def TableGenBuildOp5 : TEST_Op<"tblgen_build_5", let results = (outs AnyType:$result); let extraClassDeclaration = [{ - static LogicalResult inferReturnTypes(MLIRContext *, + static LogicalResult inferReturnTypes(MLIRContext *, Optional location, ValueRange operands, DictionaryAttr attributes, RegionRange regions, SmallVectorImpl &inferredReturnTypes) { @@ -1679,4 +1679,31 @@ def TableGenBuildOp5 : TEST_Op<"tblgen_build_5", }]; } +//===----------------------------------------------------------------------===// +// Test BufferPlacement +//===----------------------------------------------------------------------===// + +def GetTupleElementOp: TEST_Op<"get_tuple_element"> { + let description = [{ + Test op that returns a specified element of the tuple. + }]; + + let arguments = (ins + TupleOf<[AnyType]>, + I32Attr:$index + ); + let results = (outs AnyType); +} + +def MakeTupleOp: TEST_Op<"make_tuple"> { + let description = [{ + Test op that creates a tuple value from a list of values. + }]; + + let arguments = (ins + Variadic:$inputs + ); + let results = (outs TupleOf<[AnyType]>); +} + #endif // TEST_OPS diff --git a/mlir/test/lib/Transforms/TestBufferPlacement.cpp b/mlir/test/lib/Transforms/TestBufferPlacement.cpp index 6cc0924191cb8..14b72b9fc92a0 100644 --- a/mlir/test/lib/Transforms/TestBufferPlacement.cpp +++ b/mlir/test/lib/Transforms/TestBufferPlacement.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "TestDialect.h" +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/IR/Function.h" #include "mlir/IR/Operation.h" @@ -109,14 +111,16 @@ struct TestBufferPlacementPreparationPass void populateTensorLinalgToBufferLinalgConversionPattern( MLIRContext *context, BufferAssignmentPlacer *placer, - TypeConverter *converter, OwningRewritePatternList *patterns) { + BufferAssignmentTypeConverter *converter, + OwningRewritePatternList *patterns) { populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp, - allowMemrefFunctionResults>(context, placer, converter, patterns); + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, placer, + converter, patterns); patterns->insert(context, placer, converter); } void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); registry.insert(); } @@ -127,6 +131,8 @@ struct TestBufferPlacementPreparationPass // Mark all Standard operations legal. target.addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); // Mark all Linalg operations illegal as long as they work on tensors. auto isLegalOperation = [&](Operation *op) { @@ -149,6 +155,42 @@ struct TestBufferPlacementPreparationPass converter.isLegal(&funcOp.getBody()); }); + auto kind = allowMemrefFunctionResults + ? BufferAssignmentTypeConverter::KeepAsFunctionResult + : BufferAssignmentTypeConverter::AppendToArgumentsList; + converter.setResultConversionKind(kind); + converter.setResultConversionKind( + kind); + + converter.addDecomposeTypeConversion( + [](TupleType tupleType, SmallVectorImpl &types) { + tupleType.getFlattenedTypes(types); + return success(); + }); + + converter.addArgumentMaterialization( + [](OpBuilder &builder, TupleType resultType, ValueRange inputs, + Location loc) -> Optional { + if (inputs.size() == 1) + return llvm::None; + TypeRange TypeRange = inputs.getTypes(); + SmallVector types(TypeRange.begin(), TypeRange.end()); + TupleType tuple = TupleType::get(types, builder.getContext()); + mlir::Value value = builder.create(loc, tuple, inputs); + return value; + }); + + converter.addDecomposeValueConversion([](OpBuilder &builder, Location loc, + TupleType resultType, Value value, + SmallVectorImpl &values) { + for (unsigned i = 0, e = resultType.size(); i < e; ++i) { + Value res = builder.create( + loc, resultType.getType(i), value, builder.getI32IntegerAttr(i)); + values.push_back(res); + } + return success(); + }); + // Walk over all the functions to apply buffer assignment. this->getOperation().walk([&](FuncOp function) -> WalkResult { OwningRewritePatternList patterns; From 888049b97a7495ba669020522bcae6691287bd9a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 2 Sep 2020 16:53:41 +0100 Subject: [PATCH 014/465] [X86][SSE] Fold vselect(pshufb,pshufb) -> or(pshufb,pshufb) If the PSHUFBs have no other uses, then we can force the unselected elements to zero to OR them instead, avoiding both an extra mask load and a costly variable blend. Eventually we should try to bring this into shuffle combining, once we can more easily convert between shuffles + select patterns. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 57 ++++++-- .../CodeGen/X86/vector-shuffle-256-v16.ll | 21 ++- .../CodeGen/X86/vector-shuffle-256-v32.ll | 132 ++++++++---------- .../CodeGen/X86/vector-shuffle-combining.ll | 29 ++-- 4 files changed, 127 insertions(+), 112 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 674e3d88ae890..2af3d743728ee 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10888,20 +10888,25 @@ static bool isTargetShuffleEquivalent(ArrayRef Mask, // Attempt to create a shuffle mask from a VSELECT condition mask. static bool createShuffleMaskFromVSELECT(SmallVectorImpl &Mask, SDValue Cond) { - if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) + EVT CondVT = Cond.getValueType(); + unsigned EltSizeInBits = CondVT.getScalarSizeInBits(); + unsigned NumElts = CondVT.getVectorNumElements(); + + APInt UndefElts; + SmallVector EltBits; + if (!getTargetConstantBitsFromNode(Cond, EltSizeInBits, UndefElts, EltBits, + true, false)) return false; - unsigned Size = Cond.getValueType().getVectorNumElements(); - Mask.resize(Size, SM_SentinelUndef); + Mask.resize(NumElts, SM_SentinelUndef); - for (int i = 0; i != (int)Size; ++i) { - SDValue CondElt = Cond.getOperand(i); + for (int i = 0; i != (int)NumElts; ++i) { Mask[i] = i; // Arbitrarily choose from the 2nd operand if the select condition element // is undef. // TODO: Can we do better by matching patterns such as even/odd? - if (CondElt.isUndef() || isNullConstant(CondElt)) - Mask[i] += Size; + if (UndefElts[i] || EltBits[i].isNullValue()) + Mask[i] += NumElts; } return true; @@ -18139,9 +18144,11 @@ static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, // Only non-legal VSELECTs reach this lowering, convert those into generic // shuffles and re-use the shuffle lowering path for blends. - SmallVector Mask; - if (createShuffleMaskFromVSELECT(Mask, Cond)) - return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask); + if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) { + SmallVector Mask; + if (createShuffleMaskFromVSELECT(Mask, Cond)) + return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask); + } return SDValue(); } @@ -40270,6 +40277,36 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask); } + // fold vselect(cond, pshufb(x), pshufb(y)) -> or (pshufb(x), pshufb(y)) + // by forcing the unselected elements to zero. + // TODO: Can we handle more shuffles with this? + if (N->getOpcode() == ISD::VSELECT && CondVT.isVector() && + LHS.getOpcode() == X86ISD::PSHUFB && RHS.getOpcode() == X86ISD::PSHUFB && + LHS.hasOneUse() && RHS.hasOneUse()) { + MVT SimpleVT = VT.getSimpleVT(); + bool LHSUnary, RHSUnary; + SmallVector LHSOps, RHSOps; + SmallVector LHSMask, RHSMask, CondMask; + if (createShuffleMaskFromVSELECT(CondMask, Cond) && + getTargetShuffleMask(LHS.getNode(), SimpleVT, true, LHSOps, LHSMask, + LHSUnary) && + getTargetShuffleMask(RHS.getNode(), SimpleVT, true, RHSOps, RHSMask, + RHSUnary)) { + int NumElts = VT.getVectorNumElements(); + for (int i = 0; i != NumElts; ++i) { + if (CondMask[i] < NumElts) + RHSMask[i] = 0x80; + else + LHSMask[i] = 0x80; + } + LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0), + getConstVector(LHSMask, SimpleVT, DAG, DL, true)); + RHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, RHS.getOperand(0), + getConstVector(RHSMask, SimpleVT, DAG, DL, true)); + return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); + } + } + // If we have SSE[12] support, try to form min/max nodes. SSE min/max // instructions match the semantics of the common C idiom x @PR24935(<16 x i16> %a, <16 x i16> %b) { ; ; AVX2-SLOW-LABEL: PR24935: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[8,9,8,9,8,9,8,9,0,1,14,15,12,13,0,1,24,25,24,25,24,25,24,25,16,17,30,31,28,29,16,17] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17] ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,0,1,10,11,4,5,4,5,4,5,4,5,22,23,20,21,16,17,26,27,20,21,20,21,20,21,20,21] -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0> -; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX2-SLOW-NEXT: vpor %ymm2, %ymm1, %ymm1 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm2 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15] ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,5,5,5,5,8,9,10,11,13,13,13,13] ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] @@ -7294,11 +7293,10 @@ define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) { ; ; AVX2-FAST-LABEL: PR24935: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[8,9,8,9,8,9,8,9,0,1,14,15,12,13,0,1,24,25,24,25,24,25,24,25,16,17,30,31,28,29,16,17] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17] ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,0,1,10,11,4,5,4,5,4,5,4,5,22,23,20,21,16,17,26,27,20,21,20,21,20,21,20,21] -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0> -; AVX2-FAST-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX2-FAST-NEXT: vpor %ymm2, %ymm1, %ymm1 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,2,3,2,3,u,u,10,11,u,u,u,u,u,u,u,u,18,19,18,19,u,u,26,27,u,u,u,u,u,u] ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u] @@ -7330,11 +7328,10 @@ define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) { ; ; XOPAVX2-LABEL: PR24935: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[8,9,8,9,8,9,8,9,0,1,14,15,12,13,0,1,24,25,24,25,24,25,24,25,16,17,30,31,28,29,16,17] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,0,1,10,11,4,5,4,5,4,5,4,5,22,23,20,21,16,17,26,27,20,21,20,21,20,21,20,21] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0> -; XOPAVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; XOPAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm2 = ymm0[1,1,1,1,4,5,6,7,9,9,9,9,12,13,14,15] ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,5,5,5,5,8,9,10,11,13,13,13,13] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 82d1997cddfa9..e06b75355f6fb 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -2891,18 +2891,16 @@ define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_ ; ; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u] -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u] -; AVX512VLBW-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31] +; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: @@ -2922,10 +2920,9 @@ define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -2943,18 +2940,16 @@ define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_ ; ; AVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u] -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u] -; AVX512VLBW-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23] +; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: @@ -2974,10 +2969,9 @@ define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_ ; ; XOPAVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero +; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -3368,11 +3362,10 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; ; AVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u] +; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,u,u,255,255,0,255,u,u,u,255,255,u,0,0,u,u,255,u,255,255,0,0,255,0,255,u,0,0,0,0> -; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[u,u],zero,zero,ymm1[12],zero,ymm1[u,u,u],zero,zero,ymm1[u,0,3,u,u],zero,ymm1[u],zero,zero,ymm1[21,16],zero,ymm1[26],zero,ymm1[u,20,18,20,23] +; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] @@ -3383,19 +3376,17 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; ; AVX512VLBW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1] -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u] -; AVX512VLBW-NEXT: movl $-222248896, %eax # imm = 0xF2C0C040 -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm2[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] -; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7] +; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13,u,u,3,3],zero,ymm1[8,u,u,u,12,1,u],zero,zero,ymm1[u,u,20,u,17,22],zero,zero,ymm1[16],zero,ymm1[27,u],zero,zero,zero,zero +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23] +; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX512VLBW-NEXT: movl $134948620, %eax # imm = 0x80B270C ; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} -; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLBW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1} ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: @@ -3422,11 +3413,10 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; ; XOPAVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,u,u,255,255,0,255,u,u,u,255,255,u,0,0,u,u,255,u,255,255,0,0,255,0,255,u,0,0,0,0> -; XOPAVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[u,u],zero,zero,ymm1[12],zero,ymm1[u,u,u],zero,zero,ymm1[u,0,3,u,u],zero,ymm1[u],zero,zero,ymm1[21,16],zero,ymm1[26],zero,ymm1[u,20,18,20,23] +; XOPAVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] @@ -4415,11 +4405,10 @@ define <32 x i8> @shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_ ; ; AVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31] +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7],zero,zero,zero,zero,ymm0[24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] -; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7,24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31],zero,zero +; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: @@ -4438,11 +4427,10 @@ define <32 x i8> @shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7],zero,zero,zero,zero,ymm0[24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7,24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31],zero,zero +; XOPAVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -4879,17 +4867,16 @@ define <32 x i8> @shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_ define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) { ; AVX1-LABEL: PR28136: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[8,u,10,u,12,u,14,u,9,u,11,u,13,u,15,u] -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[u,8,u,10,u,12,u,14,u,9,u,11,u,13,u,15] -; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,u,2,u,4,u,6,u,1,u,3,u,5,u,7,u] -; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,0,u,2,u,4,u,6,u,1,u,3,u,5,u,7] -; AVX1-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,xmm1[8],zero,xmm1[10],zero,xmm1[12],zero,xmm1[14],zero,xmm1[9],zero,xmm1[11],zero,xmm1[13],zero,xmm1[15] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm0[8],zero,xmm0[10],zero,xmm0[12],zero,xmm0[14],zero,xmm0[9],zero,xmm0[11],zero,xmm0[13],zero,xmm0[15],zero +; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = zero,xmm1[0],zero,xmm1[2],zero,xmm1[4],zero,xmm1[6],zero,xmm1[1],zero,xmm1[3],zero,xmm1[5],zero,xmm1[7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],zero,xmm0[4],zero,xmm0[6],zero,xmm0[1],zero,xmm0[3],zero,xmm0[5],zero,xmm0[7],zero +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -4921,15 +4908,12 @@ define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) { ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[8,u,10,u,12,u,14,u,9,u,11,u,13,u,15,u] -; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[u,8,u,10,u,12,u,14,u,9,u,11,u,13,u,15] -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; XOPAVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm3, %xmm2 -; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,u,2,u,4,u,6,u,1,u,3,u,5,u,7,u] +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[u,8,u,10,u,12,u,14,u,9,u,11,u,13,u,15] +; XOPAVX1-NEXT: vpperm {{.*#+}} xmm2 = xmm0[8],xmm2[1],xmm0[10],xmm2[3],xmm0[12],xmm2[5],xmm0[14],xmm2[7],xmm0[9],xmm2[9],xmm0[11],xmm2[11],xmm0[13],xmm2[13],xmm0[15],xmm2[15] ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,0,u,2,u,4,u,6,u,1,u,3,u,5,u,7] -; XOPAVX1-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm0 +; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7],xmm0[1],xmm1[9],xmm0[3],xmm1[11],xmm0[5],xmm1[13],xmm0[7],xmm1[15] ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; @@ -4958,11 +4942,10 @@ define <32 x i8> @PR47262(<4 x i64> %a0) { ; ; AVX2-LABEL: PR47262: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4,u,u,1,5,u,u,2,6,u,u,3,7,u,u,u,u,24,28,u,u,25,29,u,u,26,30,u,u,27,31] +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7],zero,zero,zero,zero,ymm0[24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,4,u,u,1,5,u,u,2,6,u,u,3,7,24,28,u,u,25,29,u,u,26,30,u,u,27,31,u,u] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] -; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7,24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31],zero,zero +; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: PR47262: @@ -4992,11 +4975,10 @@ define <32 x i8> @PR47262(<4 x i64> %a0) { ; ; XOPAVX2-LABEL: PR47262: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4,u,u,1,5,u,u,2,6,u,u,3,7,u,u,u,u,24,28,u,u,25,29,u,u,26,30,u,u,27,31] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7],zero,zero,zero,zero,ymm0[24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31] ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,4,u,u,1,5,u,u,2,6,u,u,3,7,24,28,u,u,25,29,u,u,26,30,u,u,27,31,u,u] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7,24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31],zero,zero +; XOPAVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; XOPAVX2-NEXT: retq %t1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> %t2 = bitcast <4 x i64> %t1 to <32 x i8> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 6420a62ff0baf..5b3b1d4fba183 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -3281,22 +3281,21 @@ define void @PR45604(<32 x i16>* %dst, <8 x i16>* %src) { ; ; AVX2-FAST-LABEL: PR45604: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa (%rsi), %xmm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,4,5,u,u,u,u,2,3,6,7,u,u,u,u,16,17,20,21,u,u,u,u,18,19,22,23,u,u,u,u] -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[4,5,0,1,u,u,u,u,6,7,2,3,u,u,u,u,20,21,16,17,u,u,u,u,22,23,18,19,u,u,u,u] -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = [0,0,0,0,0,0,0,0,11,11,11,11,11,11,11,11] -; AVX2-FAST-NEXT: vpblendvb %ymm4, {{.*}}(%rip), %ymm5, %ymm4 -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = <255,255,0,0,u,u,u,u,255,255,0,0,u,u,u,u,0,0,255,255,u,u,u,u,0,0,255,255,u,u,u,u> -; AVX2-FAST-NEXT: vpblendvb %ymm5, %ymm1, %ymm3, %ymm1 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,12,13,u,u,u,u,10,11,14,15,u,u,u,u,24,25,28,29,u,u,u,u,26,27,30,31,u,u,u,u] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[12,13,8,9,u,u,u,u,14,15,10,11,u,u,u,u,28,29,24,25,u,u,u,u,30,31,26,27,u,u,u,u] -; AVX2-FAST-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm0 -; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm4[1],ymm1[2],ymm4[3],ymm1[4],ymm4[5],ymm1[6],ymm4[7] -; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm4[1],ymm0[2],ymm4[3],ymm0[4],ymm4[5],ymm0[6],ymm4[7] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm0 = +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,11,11,11,11,11,11,11,11] +; AVX2-FAST-NEXT: vpblendvb %ymm0, {{.*}}(%rip), %ymm1, %ymm0 +; AVX2-FAST-NEXT: vmovdqa (%rsi), %xmm1 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[0,1],zero,zero,ymm1[u,u,u,u,2,3],zero,zero,ymm1[u,u,u,u],zero,zero,ymm1[20,21,u,u,u,u],zero,zero,ymm1[22,23,u,u,u,u] +; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm3 = ymm1[2,3,0,1] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm4 = zero,zero,ymm3[0,1,u,u,u,u],zero,zero,ymm3[2,3,u,u,u,u,20,21],zero,zero,ymm3[u,u,u,u,22,23],zero,zero,ymm3[u,u,u,u] +; AVX2-FAST-NEXT: vpor %ymm4, %ymm2, %ymm2 +; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm0[1],ymm2[2],ymm0[3],ymm2[4],ymm0[5],ymm2[6],ymm0[7] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[8,9],zero,zero,ymm1[u,u,u,u,10,11],zero,zero,ymm1[u,u,u,u],zero,zero,ymm1[28,29,u,u,u,u],zero,zero,ymm1[30,31,u,u,u,u] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm3 = zero,zero,ymm3[8,9,u,u,u,u],zero,zero,ymm3[10,11,u,u,u,u,28,29],zero,zero,ymm3[u,u,u,u,30,31],zero,zero,ymm3[u,u,u,u] +; AVX2-FAST-NEXT: vpor %ymm3, %ymm1, %ymm1 +; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] ; AVX2-FAST-NEXT: vmovdqu %ymm0, 32(%rdi) -; AVX2-FAST-NEXT: vmovdqu %ymm1, (%rdi) +; AVX2-FAST-NEXT: vmovdqu %ymm2, (%rdi) ; AVX2-FAST-NEXT: vzeroupper ; AVX2-FAST-NEXT: retq %v1 = load <8 x i16>, <8 x i16>* %src, align 16 From ddd48cdba690fdeefc6ad02a912b63bdb66401b4 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Wed, 2 Sep 2020 12:11:29 -0400 Subject: [PATCH 015/465] [ms] [llvm-ml] Add support for line continuations in MASM Add support for line continuations (the "backslash operator") in MASM by modifying the Parser's Lex method. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D83347 --- llvm/lib/MC/MCParser/MasmParser.cpp | 8 ++++++++ llvm/test/tools/llvm-ml/line_continuations.test | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 llvm/test/tools/llvm-ml/line_continuations.test diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index fb7aaae295dfa..45165ffe3cac0 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -1094,6 +1094,14 @@ const AsmToken &MasmParser::Lex() { tok = &Lexer.Lex(); } + // Recognize and bypass line continuations. + while (tok->is(AsmToken::BackSlash) && + Lexer.peekTok().is(AsmToken::EndOfStatement)) { + // Eat both the backslash and the end of statement. + Lexer.Lex(); + tok = &Lexer.Lex(); + } + if (tok->is(AsmToken::Eof)) { // If this is the end of an included file, pop the parent file off the // include stack. diff --git a/llvm/test/tools/llvm-ml/line_continuations.test b/llvm/test/tools/llvm-ml/line_continuations.test new file mode 100644 index 0000000000000..604bbe91b32af --- /dev/null +++ b/llvm/test/tools/llvm-ml/line_continuations.test @@ -0,0 +1,17 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.code + +t1: +mov eax, \ + ebx +# CHECK: t1: +# CHECK-NEXT: mov eax, ebx + +t2: +mov eax, [ebx + \ + 1] +# CHECK: t2: +# CHECK-NEXT: mov eax, dword ptr [ebx + 1] + +END From 8ff44e644bb70dfb8decc397a42679df6e6f8ba1 Mon Sep 17 00:00:00 2001 From: Erik Pilkington Date: Tue, 1 Sep 2020 11:52:28 -0400 Subject: [PATCH 016/465] [IRGen] Fix an assert when __attribute__((used)) is used on an ObjC method This assert doesn't really make sense for functions in general, since they start life as declarations, and there isn't really any reason to require them to be defined before attributes are applied to them. rdar://67895846 --- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- clang/test/CodeGenObjC/attr-used-on-method.m | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenObjC/attr-used-on-method.m diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 77a5079bd0f1c..1f362e2b6b318 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1989,7 +1989,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, } void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) { - assert(!GV->isDeclaration() && + assert(isa(GV) || !GV->isDeclaration() && "Only globals with definition can force usage."); LLVMUsed.emplace_back(GV); } diff --git a/clang/test/CodeGenObjC/attr-used-on-method.m b/clang/test/CodeGenObjC/attr-used-on-method.m new file mode 100644 index 0000000000000..d8b2a5d291841 --- /dev/null +++ b/clang/test/CodeGenObjC/attr-used-on-method.m @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 %s -S -emit-llvm -o - | FileCheck %s + +// CHECK: @llvm.used = +// CHECK-SAME: @"\01-[X m]" + +// CHECK: define internal void @"\01-[X m]"( + +@interface X @end +@implementation X +-(void) m __attribute__((used)) {} +@end From d46f2c51e4c849683434bb5a0fb6164957474b8f Mon Sep 17 00:00:00 2001 From: Erik Pilkington Date: Tue, 1 Sep 2020 20:17:00 -0400 Subject: [PATCH 017/465] Make -fvisibility-inlines-hidden apply to static local variables in inline functions on Darwin This effectively disables r340386 on Darwin, and provides a command line flag to opt into/out of this behaviour. This change is needed to compile certain Apple headers correctly. rdar://47688592 Differential revision: https://reviews.llvm.org/D86881 --- clang/include/clang/Basic/LangOptions.def | 3 ++ clang/include/clang/Driver/Options.td | 11 ++++ clang/lib/AST/Decl.cpp | 3 +- clang/lib/Driver/ToolChains/Clang.cpp | 2 + clang/lib/Driver/ToolChains/Darwin.cpp | 7 +++ clang/lib/Frontend/CompilerInvocation.cpp | 3 ++ ...bility-inlines-hidden-static-local-var.cpp | 53 +++++++++++++++++++ clang/test/Driver/darwin-objc-options.m | 9 ++++ 8 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenCXX/visibility-inlines-hidden-static-local-var.cpp diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index d7bba5426c2a7..55a784196bb9c 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -265,6 +265,9 @@ BENIGN_LANGOPT(DumpRecordLayoutsSimple , 1, 0, "dumping the layout of IRgen'd re BENIGN_LANGOPT(DumpVTableLayouts , 1, 0, "dumping the layouts of emitted vtables") LANGOPT(NoConstantCFStrings , 1, 0, "no constant CoreFoundation strings") BENIGN_LANGOPT(InlineVisibilityHidden , 1, 0, "hidden visibility for inline C++ methods") +BENIGN_LANGOPT(VisibilityInlinesHiddenStaticLocalVar, 1, 0, + "hidden visibility for static local variables in inline C++ " + "methods when -fvisibility-inlines hidden is enabled") LANGOPT(GlobalAllocationFunctionVisibilityHidden , 1, 0, "hidden visibility for global operator new and delete declaration") BENIGN_LANGOPT(ParseUnknownAnytype, 1, 0, "__unknown_anytype") BENIGN_LANGOPT(DebuggerSupport , 1, 0, "debugger support") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index ff7b4aa9320cc..5a6a196191e7f 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1943,6 +1943,17 @@ def fvisibility_EQ : Joined<["-"], "fvisibility=">, Group, def fvisibility_inlines_hidden : Flag<["-"], "fvisibility-inlines-hidden">, Group, HelpText<"Give inline C++ member functions hidden visibility by default">, Flags<[CC1Option]>; +def fvisibility_inlines_hidden_static_local_var : + Flag<["-"], "fvisibility-inlines-hidden-static-local-var">, Group, + HelpText<"When -fvisibility-inlines-hidden is enabled, static variables in " + "inline C++ member functions will also be given hidden visibility " + "by default">, + Flags<[CC1Option]>; +def fno_visibility_inlines_hidden_static_local_var : + Flag<["-"], "fno-visibility-inlines-hidden-static-local-var">, Group, + HelpText<"Disables -fvisibility-inlines-hidden-static-local-var " + "(this is the default on non-darwin targets)">, + Flags<[CC1Option]>; def fvisibility_ms_compat : Flag<["-"], "fvisibility-ms-compat">, Group, HelpText<"Give global types 'default' visibility and global functions and " "variables 'hidden' visibility by default">; diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 5c0a98815dd79..9815f0648ad76 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1299,7 +1299,8 @@ LinkageInfo LinkageComputer::getLVForLocalDecl(const NamedDecl *D, // we should not make static local variables in the function hidden. LV = getLVForDecl(FD, computation); if (isa(D) && useInlineVisibilityHidden(FD) && - !LV.isVisibilityExplicit()) { + !LV.isVisibilityExplicit() && + !Context.getLangOpts().VisibilityInlinesHiddenStaticLocalVar) { assert(cast(D)->isStaticLocal()); // If this was an implicitly hidden inline method, check again for // explicit visibility on the parent class, and use that for static locals diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3023c94bf10c4..bd5a89c2360cd 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5210,6 +5210,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden); + Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden_static_local_var, + options::OPT_fno_visibility_inlines_hidden_static_local_var); Args.AddLastArg(CmdArgs, options::OPT_fvisibility_global_new_delete_hidden); Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ); diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 04349ff6af984..9d22cda217116 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -2408,6 +2408,13 @@ void Darwin::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, // Enable compatibility mode for NSItemProviderCompletionHandler in // Foundation/NSItemProvider.h. CC1Args.push_back("-fcompatibility-qualified-id-block-type-checking"); + + // Give static local variables in inline functions hidden visibility when + // -fvisibility-inlines-hidden is enabled. + if (!DriverArgs.getLastArgNoClaim( + options::OPT_fvisibility_inlines_hidden_static_local_var, + options::OPT_fno_visibility_inlines_hidden_static_local_var)) + CC1Args.push_back("-fvisibility-inlines-hidden-static-local-var"); } DerivedArgList * diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 1cd392f650099..9143dd6ca2576 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2766,6 +2766,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, if (Args.hasArg(OPT_fvisibility_inlines_hidden)) Opts.InlineVisibilityHidden = 1; + if (Args.hasArg(OPT_fvisibility_inlines_hidden_static_local_var)) + Opts.VisibilityInlinesHiddenStaticLocalVar = 1; + if (Args.hasArg(OPT_fvisibility_global_new_delete_hidden)) Opts.GlobalAllocationFunctionVisibilityHidden = 1; diff --git a/clang/test/CodeGenCXX/visibility-inlines-hidden-static-local-var.cpp b/clang/test/CodeGenCXX/visibility-inlines-hidden-static-local-var.cpp new file mode 100644 index 0000000000000..57e6dea72e21f --- /dev/null +++ b/clang/test/CodeGenCXX/visibility-inlines-hidden-static-local-var.cpp @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fvisibility-inlines-hidden -fvisibility-inlines-hidden-static-local-var %s -emit-llvm -o - | FileCheck %s + +#define used __attribute__((used)) + +used inline void f1() { + // CHECK: @_ZZ2f1vE6f1_var = linkonce_odr hidden global i32 0 + static int f1_var = 0; +} + +__attribute__((visibility("default"))) +used inline void f2() { + // CHECK: @_ZZ2f2vE6f2_var = linkonce_odr global i32 0 + static int f2_var = 0; +} + +struct S { + used void f3() { + // CHECK: @_ZZN1S2f3EvE6f3_var = linkonce_odr hidden global i32 0 + static int f3_var = 0; + } + + void f6(); + void f7(); +}; + +used void f4() { + // CHECK: @_ZZ2f4vE6f4_var = internal global i32 0 + static int f4_var = 0; +} + +__attribute__((visibility("default"))) +used void f5() { + // CHECK: @_ZZ2f5vE6f5_var = internal global i32 0 + static int f5_var = 0; +} + +used void S::f6() { + // CHECK: @_ZZN1S2f6EvE6f6_var = internal global i32 0 + static int f6_var = 0; +} + +used inline void S::f7() { + // CHECK: @_ZZN1S2f7EvE6f7_var = linkonce_odr hidden global i32 0 + static int f7_var = 0; +} + + +struct __attribute__((visibility("default"))) S2 { + used void f8() { + // CHECK: @_ZZN2S22f8EvE6f8_var = linkonce_odr hidden global i32 0 + static int f8_var = 0; + } +}; diff --git a/clang/test/Driver/darwin-objc-options.m b/clang/test/Driver/darwin-objc-options.m index 6684a5272175b..8721fbc1ef1e2 100644 --- a/clang/test/Driver/darwin-objc-options.m +++ b/clang/test/Driver/darwin-objc-options.m @@ -46,3 +46,12 @@ // RUN: %clang -target x86_64-linux-gnu -### %s 2>&1 | FileCheck --check-prefix=OTHER_COMPATIBILITY %s // DARWIN_COMPATIBILITY: -fcompatibility-qualified-id-block-type-checking // OTHER_COMPATIBILITY-NOT: -fcompatibility-qualified-id-block-type-checking + +// Add -fvisibility-inlines-hidden-static-local-var on Darwin. +// RUN: %clang -target x86_64-apple-darwin10 -### %s 2>&1 | FileCheck --check-prefix=DARWIN_INLINES_HIDDEN %s +// RUN: %clang -target x86_64-apple-darwin10 -fno-visibility-inlines-hidden-static-local-var -### %s 2>&1 | FileCheck --check-prefix=DARWIN_INLINES_HIDDEN_EXPLICIT_NO %s +// RUN: %clang -target x86_64-linux-gnu -### %s 2>&1 | FileCheck --check-prefix=NO_DARWIN_INLINES_HIDDEN %s +// DARWIN_INLINES_HIDDEN: -fvisibility-inlines-hidden-static-local-var +// DARWIN_INLINES_HIDDEN_EXPLICIT_NO-NOT: -fvisibility-inlines-hidden-static-local-var +// DARWIN_INLINES_HIDDEN_EXPLICIT_NO: -fno-visibility-inlines-hidden-static-local-var +// NO_DARWIN_INLINES_HIDDEN-NOT: -fvisibility-inlines-hidden-static-local-var From 5201b962e8956b75dffd2167e278b8627981c90b Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 2 Sep 2020 10:36:48 -0400 Subject: [PATCH 018/465] [libc++] Re-apply the workaround for timespec_get not always being available in Apple SDKs This commit re-applies 99f3b231cb21, which was reverted in 814242572731 because it broke the modules build. The modules failure was a circular dependency between the Darwin module and __config. Specifically, the issue was that if <__config> includes a system header, the std_config module depends on the Darwin module. However, the Darwin module already depends on the std_config header because some of its headers include libc++ headers like (they mean to include the C , but libc++ headers are first in the header search path). This is fixed by moving the workaround to only. https://llvm.org/PR47208 rdar://68157284 --- libcxx/include/ctime | 14 +++++++++++++- .../timespec_get.xopen.compile.pass.cpp | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 libcxx/test/libcxx/language.support/timespec_get.xopen.compile.pass.cpp diff --git a/libcxx/include/ctime b/libcxx/include/ctime index f9f2f1659d0e0..3aa619daa3581 100644 --- a/libcxx/include/ctime +++ b/libcxx/include/ctime @@ -52,6 +52,18 @@ int timespec_get( struct timespec *ts, int base); // C++17 #pragma GCC system_header #endif +// FIXME: +// Apple SDKs don't define ::timespec_get unconditionally in C++ mode. This +// should be fixed in future SDKs, but for the time being we need to avoid +// trying to use that declaration when the SDK doesn't provide it. Note that +// we're detecting this here instead of in <__config> because we can't include +// system headers from <__config>, since it leads to circular module dependencies. +// This is also meant to be a very temporary workaround until the SDKs are fixed. +#include +#if defined(__APPLE__) && defined(_LIBCPP_HAS_TIMESPEC_GET) && (__DARWIN_C_LEVEL < __DARWIN_C_FULL) +# define _LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED +#endif + _LIBCPP_BEGIN_NAMESPACE_STD using ::clock_t; @@ -72,7 +84,7 @@ using ::gmtime; using ::localtime; #endif using ::strftime; -#if _LIBCPP_STD_VER > 14 && defined(_LIBCPP_HAS_TIMESPEC_GET) +#if _LIBCPP_STD_VER > 14 && defined(_LIBCPP_HAS_TIMESPEC_GET) && !defined(_LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED) using ::timespec_get; #endif diff --git a/libcxx/test/libcxx/language.support/timespec_get.xopen.compile.pass.cpp b/libcxx/test/libcxx/language.support/timespec_get.xopen.compile.pass.cpp new file mode 100644 index 0000000000000..cf4c5957a4183 --- /dev/null +++ b/libcxx/test/libcxx/language.support/timespec_get.xopen.compile.pass.cpp @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14 + +// Make sure that can be included even when _XOPEN_SOURCE is defined. +// This used to trigger some bug in Apple SDKs, since timespec_get was not +// defined in but we tried using it from . +// See https://llvm.org/PR47208 for details. + +// ADDITIONAL_COMPILE_FLAGS: -D_XOPEN_SOURCE=500 + +#include From 44cc78da056fbda2693f0489275f8e0ff1f590a1 Mon Sep 17 00:00:00 2001 From: hyd-dev Date: Wed, 2 Sep 2020 12:22:29 -0400 Subject: [PATCH 019/465] [libc++] Fix incorrect usage of __STDC_HOSTED__ D56913 introduced the _LIBCPP_FREESTANDING macro and guarded its definition by: #ifndef __STDC_HOSTED__ # define _LIBCPP_FREESTANDING #endif However, __STDC_HOSTED__ is defined as 0 in freestanding implementations instead of undefined, which means that _LIBCPP_FREESTANDING would never get defined. This patch corrects the above as: #if __STDC_HOSTED__ == 0 # define _LIBCPP_FREESTANDING #endif Differential Revision: https://reviews.llvm.org/D86055 --- libcxx/include/__config | 2 +- libcxx/test/libcxx/libcpp_freestanding.sh.cpp | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 libcxx/test/libcxx/libcpp_freestanding.sh.cpp diff --git a/libcxx/include/__config b/libcxx/include/__config index d7b6a2acaefff..3e64694f284b6 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -38,7 +38,7 @@ # define _LIBCPP_ABI_VERSION 1 #endif -#ifndef __STDC_HOSTED__ +#if __STDC_HOSTED__ == 0 # define _LIBCPP_FREESTANDING #endif diff --git a/libcxx/test/libcxx/libcpp_freestanding.sh.cpp b/libcxx/test/libcxx/libcpp_freestanding.sh.cpp new file mode 100644 index 0000000000000..5a51f1be4e823 --- /dev/null +++ b/libcxx/test/libcxx/libcpp_freestanding.sh.cpp @@ -0,0 +1,21 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Test that _LIBCPP_FREESTANDING is not defined when -ffreestanding is not passed +// to the compiler but defined when -ffreestanding is passed to the compiler. + +// RUN: %{cxx} %{flags} %{compile_flags} -fsyntax-only %s +// RUN: %{cxx} %{flags} %{compile_flags} -fsyntax-only -ffreestanding -DFREESTANDING %s + +#include <__config> + +#if defined(FREESTANDING) != defined(_LIBCPP_FREESTANDING) +#error _LIBCPP_FREESTANDING should be defined in freestanding mode and not \ + defined in non-freestanding mode +#endif From 4f57a126c4e763e3041c04f0b22e91200506dcc6 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 2 Sep 2020 12:29:42 -0400 Subject: [PATCH 020/465] [libc++] Remove definition of _LIBCPP_ALIGNOF for GCC in C++03 mode That definition is known to be potentially incorrect, and we don't support GCC in C++03 mode anyway. --- libcxx/include/__config | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 3e64694f284b6..17e6bfe207aaf 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -398,9 +398,7 @@ #elif defined(_LIBCPP_COMPILER_CLANG) # define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp) #else -// This definition is potentially buggy, but it's only taken with GCC in C++03, -// which we barely support anyway. See llvm.org/PR39713 -# define _LIBCPP_ALIGNOF(_Tp) __alignof(_Tp) +# error "We don't know a correct way to implement alignof(T) in C++03 outside of Clang" #endif #define _LIBCPP_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp) From f80866bd4a2a4e8b650aef1f9a88870dd336e20f Mon Sep 17 00:00:00 2001 From: peter klausler Date: Tue, 1 Sep 2020 16:55:43 -0700 Subject: [PATCH 021/465] [flang] Make -fget-symbols-sources output deterministic The DumpSymbolsSources() routine ordered its output by the addresses of the names of the symbols, and was susceptible to variation across environments. Fixed by using a multimap using the values of the names. Differential Revision: https://reviews.llvm.org/D87035 --- flang/lib/Semantics/semantics.cpp | 6 +++--- flang/test/Semantics/getsymbols01.f90 | 6 +++--- flang/test/Semantics/getsymbols02.f90 | 2 +- flang/test/Semantics/getsymbols03-a.f90 | 2 +- flang/test/Semantics/getsymbols04.f90 | 2 +- flang/test/Semantics/getsymbols05.f90 | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index 24d0baa9c2ae8..af5b120d9393a 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -45,17 +45,17 @@ namespace Fortran::semantics { -using NameToSymbolMap = std::map; +using NameToSymbolMap = std::multimap; static void DoDumpSymbols(llvm::raw_ostream &, const Scope &, int indent = 0); static void PutIndent(llvm::raw_ostream &, int indent); static void GetSymbolNames(const Scope &scope, NameToSymbolMap &symbols) { // Finds all symbol names in the scope without collecting duplicates. for (const auto &pair : scope) { - symbols.emplace(pair.second->name().begin(), *pair.second); + symbols.emplace(pair.second->name(), *pair.second); } for (const auto &pair : scope.commonBlocks()) { - symbols.emplace(pair.second->name().begin(), *pair.second); + symbols.emplace(pair.second->name(), *pair.second); } for (const auto &child : scope.children()) { GetSymbolNames(child, symbols); diff --git a/flang/test/Semantics/getsymbols01.f90 b/flang/test/Semantics/getsymbols01.f90 index 8f50304825dc7..bdb7bf053823a 100644 --- a/flang/test/Semantics/getsymbols01.f90 +++ b/flang/test/Semantics/getsymbols01.f90 @@ -16,10 +16,10 @@ recursive pure function f() result(x) end module ! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s +! CHECK-COUNT-1:f:{{.*}}getsymbols01.f90, 12, 26-27 ! CHECK-COUNT-1:mm1:{{.*}}getsymbols01.f90, 2, 8-11 ! CHECK-COUNT-1:s:{{.*}}getsymbols01.f90, 5, 18-19 -! CHECK-COUNT-1:x:{{.*}}getsymbols01.f90, 5, 21-22 -! CHECK-COUNT-1:y:{{.*}}getsymbols01.f90, 5, 24-25 ! CHECK-COUNT-1:ss:{{.*}}getsymbols01.f90, 9, 19-21 -! CHECK-COUNT-1:f:{{.*}}getsymbols01.f90, 12, 26-27 +! CHECK-COUNT-1:x:{{.*}}getsymbols01.f90, 5, 21-22 ! CHECK-COUNT-1:x:{{.*}}getsymbols01.f90, 13, 24-25 +! CHECK-COUNT-1:y:{{.*}}getsymbols01.f90, 5, 24-25 diff --git a/flang/test/Semantics/getsymbols02.f90 b/flang/test/Semantics/getsymbols02.f90 index 4c8f0710eb23d..1eed3e922e826 100644 --- a/flang/test/Semantics/getsymbols02.f90 +++ b/flang/test/Semantics/getsymbols02.f90 @@ -10,5 +10,5 @@ PROGRAM helloworld ! RUN: %f18 -fparse-only %S/Inputs/getsymbols02-a.f90 ! RUN: %f18 -fparse-only %S/Inputs/getsymbols02-b.f90 ! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s -! CHECK: get5: mm2a ! CHECK: callget5: mm2b +! CHECK: get5: mm2a diff --git a/flang/test/Semantics/getsymbols03-a.f90 b/flang/test/Semantics/getsymbols03-a.f90 index c11aee03048c9..980d6bc58c1a6 100644 --- a/flang/test/Semantics/getsymbols03-a.f90 +++ b/flang/test/Semantics/getsymbols03-a.f90 @@ -8,7 +8,7 @@ program main end program ! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s -! CHECK:mm3:{{.*}}getsymbols03-b.f90, 1, 8-11 ! CHECK:f:{{.*}}getsymbols03-b.f90, 2, 12-13 ! CHECK:main:{{.*}}getsymbols03-a.f90, 4, 9-13 +! CHECK:mm3:{{.*}}getsymbols03-b.f90, 1, 8-11 ! CHECK:x:{{.*}}getsymbols03-a.f90, 6, 13-14 diff --git a/flang/test/Semantics/getsymbols04.f90 b/flang/test/Semantics/getsymbols04.f90 index 4decfc78560ad..fc9b177abd903 100644 --- a/flang/test/Semantics/getsymbols04.f90 +++ b/flang/test/Semantics/getsymbols04.f90 @@ -8,5 +8,5 @@ program main ! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s ! CHECK:x:{{.*}}getsymbols04.f90, 3, 14-15 -! CHECK:y:{{.*}}getsymbols04.f90, 4, 14-15 ! CHECK:x:{{.*}}getsymbols04.f90, 5, 11-12 +! CHECK:y:{{.*}}getsymbols04.f90, 4, 14-15 diff --git a/flang/test/Semantics/getsymbols05.f90 b/flang/test/Semantics/getsymbols05.f90 index 30dcb2278e002..624f37a74b763 100644 --- a/flang/test/Semantics/getsymbols05.f90 +++ b/flang/test/Semantics/getsymbols05.f90 @@ -11,5 +11,5 @@ program main ! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s ! CHECK:x:{{.*}}getsymbols05.f90, 3, 14-15 -! CHECK:y:{{.*}}getsymbols05.f90, 4, 14-15 ! CHECK:x:{{.*}}getsymbols05.f90, 6, 16-17 +! CHECK:y:{{.*}}getsymbols05.f90, 4, 14-15 From ecde200209f82b7362277ea59ad84df66c42dd3b Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Wed, 2 Sep 2020 19:42:18 +0300 Subject: [PATCH 022/465] [AMDGPU][MC] Corrected parser to avoid generation of excessive error messages Summary of changes: - Changed parser to eliminate generation of excessive error messages; - Corrected lit tests to match all expected error messages; - Corrected lit tests to guard against unwanted extra messages (added option "--implicit-check-not=error:"); - Added missing checks and fixed some typos in tests. See bug 46907: https://bugs.llvm.org/show_bug.cgi?id=46907 Reviewers: arsenm, rampitec Differential Revision: https://reviews.llvm.org/D86940 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 141 ++-- llvm/lib/Target/AMDGPU/SIDefines.h | 3 +- llvm/test/MC/AMDGPU/add-sub-no-carry.s | 4 +- llvm/test/MC/AMDGPU/atomic-fadd-insts.s | 2 +- llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s | 2 +- llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s | 4 +- llvm/test/MC/AMDGPU/dl-insts-err.s | 102 +-- llvm/test/MC/AMDGPU/dpp-err.s | 28 +- llvm/test/MC/AMDGPU/ds-err.s | 16 +- llvm/test/MC/AMDGPU/ds-gfx9.s | 2 +- llvm/test/MC/AMDGPU/ds.s | 10 +- llvm/test/MC/AMDGPU/exp-err.s | 4 +- llvm/test/MC/AMDGPU/exp-gfx10.s | 4 +- llvm/test/MC/AMDGPU/expressions-gfx10.s | 2 +- llvm/test/MC/AMDGPU/expressions-gfx9.s | 2 +- llvm/test/MC/AMDGPU/expressions.s | 10 +- llvm/test/MC/AMDGPU/flat-gfx10.s | 2 +- llvm/test/MC/AMDGPU/flat-gfx9.s | 4 +- llvm/test/MC/AMDGPU/flat-global.s | 106 +-- .../MC/AMDGPU/flat-scratch-instructions.s | 62 +- llvm/test/MC/AMDGPU/flat-scratch.s | 6 +- llvm/test/MC/AMDGPU/flat.s | 188 ++--- llvm/test/MC/AMDGPU/fma-mix.s | 34 +- llvm/test/MC/AMDGPU/gfx10-constant-bus.s | 4 +- llvm/test/MC/AMDGPU/gfx1011_err.s | 36 +- llvm/test/MC/AMDGPU/gfx1030_err.s | 96 +-- llvm/test/MC/AMDGPU/gfx10_asm_all.s | 4 +- llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s | 4 +- llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s | 4 +- llvm/test/MC/AMDGPU/gfx10_asm_err.s | 14 +- llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s | 2 +- llvm/test/MC/AMDGPU/gfx8_asm_all.s | 2 +- llvm/test/MC/AMDGPU/gfx9-asm-err.s | 2 +- llvm/test/MC/AMDGPU/gfx9-vop2be-literal.s | 2 +- llvm/test/MC/AMDGPU/gfx9_asm_all.s | 2 +- .../AMDGPU/invalid-instructions-spellcheck.s | 4 - llvm/test/MC/AMDGPU/lds_direct-err.s | 2 +- llvm/test/MC/AMDGPU/lds_direct-gfx10.s | 2 +- llvm/test/MC/AMDGPU/literal16-err.s | 2 +- llvm/test/MC/AMDGPU/literals.s | 101 +-- llvm/test/MC/AMDGPU/literalv216-err.s | 7 +- llvm/test/MC/AMDGPU/literalv216.s | 4 +- llvm/test/MC/AMDGPU/mad-mix.s | 36 +- llvm/test/MC/AMDGPU/mai-err.s | 177 ++++- llvm/test/MC/AMDGPU/mai.s | 2 +- llvm/test/MC/AMDGPU/mimg-err.s | 6 +- llvm/test/MC/AMDGPU/mimg.s | 14 +- llvm/test/MC/AMDGPU/mtbuf-gfx10.s | 2 +- llvm/test/MC/AMDGPU/mtbuf.s | 6 +- llvm/test/MC/AMDGPU/mubuf-gfx9.s | 22 +- llvm/test/MC/AMDGPU/mubuf.s | 6 +- llvm/test/MC/AMDGPU/out-of-range-registers.s | 20 +- llvm/test/MC/AMDGPU/reg-syntax-err.s | 42 +- llvm/test/MC/AMDGPU/reg-syntax-extra.s | 74 +- llvm/test/MC/AMDGPU/regression/bug28538.s | 12 +- llvm/test/MC/AMDGPU/smem-err.s | 2 +- llvm/test/MC/AMDGPU/smem.s | 159 +++-- llvm/test/MC/AMDGPU/smrd-err.s | 11 +- llvm/test/MC/AMDGPU/smrd.s | 6 +- llvm/test/MC/AMDGPU/sop1-err.s | 6 +- llvm/test/MC/AMDGPU/sop1.s | 86 ++- llvm/test/MC/AMDGPU/sop2-err.s | 2 +- llvm/test/MC/AMDGPU/sop2.s | 102 ++- llvm/test/MC/AMDGPU/sopc-err.s | 2 +- llvm/test/MC/AMDGPU/sopc.s | 34 +- llvm/test/MC/AMDGPU/sopk-err.s | 67 +- llvm/test/MC/AMDGPU/sopk.s | 8 +- llvm/test/MC/AMDGPU/sopp-err.s | 19 +- llvm/test/MC/AMDGPU/sopp.s | 12 +- llvm/test/MC/AMDGPU/trap.s | 8 +- llvm/test/MC/AMDGPU/vintrp-err.s | 4 +- llvm/test/MC/AMDGPU/vop-err.s | 4 +- llvm/test/MC/AMDGPU/vop1-gfx9-err.s | 6 +- llvm/test/MC/AMDGPU/vop1-gfx9.s | 6 +- llvm/test/MC/AMDGPU/vop1.s | 8 +- llvm/test/MC/AMDGPU/vop2-err.s | 4 +- llvm/test/MC/AMDGPU/vop2.s | 8 +- llvm/test/MC/AMDGPU/vop3-convert.s | 8 +- llvm/test/MC/AMDGPU/vop3-errs.s | 8 +- llvm/test/MC/AMDGPU/vop3-gfx9.s | 281 ++++++-- llvm/test/MC/AMDGPU/vop3-literal.s | 31 +- llvm/test/MC/AMDGPU/vop3-modifiers-err.s | 2 +- llvm/test/MC/AMDGPU/vop3.s | 67 +- llvm/test/MC/AMDGPU/vop3p-err.s | 10 +- llvm/test/MC/AMDGPU/vop_dpp.s | 320 ++++----- llvm/test/MC/AMDGPU/vop_sdwa.s | 648 +++++++++--------- llvm/test/MC/AMDGPU/vopc-errs.s | 6 +- llvm/test/MC/AMDGPU/vopc-vi.s | 4 +- llvm/test/MC/AMDGPU/wave32.s | 4 +- llvm/test/MC/AMDGPU/xdl-insts-err.s | 4 +- llvm/test/MC/AMDGPU/xnack-mask.s | 10 +- 91 files changed, 2026 insertions(+), 1392 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 2833875e438cd..db74f8a54c0af 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1340,7 +1340,6 @@ class AMDGPUAsmParser : public MCTargetAsmParser { const int64_t Width, const SMLoc Loc); - void errorExpTgt(); OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; @@ -4705,22 +4704,18 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, if (getLexer().is(AsmToken::Comma)) Parser.Lex(); - switch (Res) { - case MatchOperand_Success: break; - case MatchOperand_ParseFail: + if (Res != MatchOperand_Success) { + if (!Parser.hasPendingError()) { // FIXME: use real operand location rather than the current location. - Error(getLexer().getLoc(), "failed parsing operand."); - while (!getLexer().is(AsmToken::EndOfStatement)) { - Parser.Lex(); - } - return true; - case MatchOperand_NoMatch: - // FIXME: use real operand location rather than the current location. - Error(getLexer().getLoc(), "not a valid operand."); - while (!getLexer().is(AsmToken::EndOfStatement)) { - Parser.Lex(); - } - return true; + StringRef Msg = + (Res == MatchOperand_ParseFail) ? "failed parsing operand." : + "not a valid operand."; + Error(getLexer().getLoc(), Msg); + } + while (!getLexer().is(AsmToken::EndOfStatement)) { + Parser.Lex(); + } + return true; } } @@ -5004,8 +4999,10 @@ AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, } if (Dfmt == DFMT_UNDEF) { Error(Loc, "duplicate numeric format"); - } else if (Nfmt == NFMT_UNDEF){ + return MatchOperand_ParseFail; + } else if (Nfmt == NFMT_UNDEF) { Error(Loc, "duplicate data format"); + return MatchOperand_ParseFail; } } @@ -5014,8 +5011,10 @@ AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, if (isGFX10()) { auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); - if (Ufmt == UFMT_UNDEF) + if (Ufmt == UFMT_UNDEF) { Error(FormatLoc, "unsupported format"); + return MatchOperand_ParseFail; + } Format = Ufmt; } else { Format = encodeDfmtNfmt(Dfmt, Nfmt); @@ -5077,7 +5076,9 @@ AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { if (Res != MatchOperand_Success) return Res; - skipToken(AsmToken::RBrac, "expected a closing square bracket"); + if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) + return MatchOperand_ParseFail; + return MatchOperand_Success; } @@ -5119,7 +5120,10 @@ AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { trySkipToken(AsmToken::Comma); if (!FormatFound) { - if (parseSymbolicOrNumericFormat(Format) == MatchOperand_Success) { + Res = parseSymbolicOrNumericFormat(Format); + if (Res == MatchOperand_ParseFail) + return Res; + if (Res == MatchOperand_Success) { auto Size = Operands.size(); AMDGPUOperand &Op = static_cast(*Operands[Size - 2]); assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); @@ -5340,12 +5344,14 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { int64_t Waitcnt = getWaitcntBitMask(ISA); SMLoc S = getLoc(); - // If parse failed, do not return error code - // to avoid excessive error messages. if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { - while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); + while (!isToken(AsmToken::EndOfStatement)) { + if (!parseCnt(Waitcnt)) + return MatchOperand_ParseFail; + } } else { - parseExpr(Waitcnt); + if (!parseExpr(Waitcnt)) + return MatchOperand_ParseFail; } Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); @@ -5419,8 +5425,6 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { int64_t ImmVal = 0; SMLoc Loc = getLoc(); - // If parse failed, do not return error code - // to avoid excessive error messages. if (trySkipId("hwreg", AsmToken::LParen)) { OperandInfoTy HwReg(ID_UNKNOWN_); int64_t Offset = OFFSET_DEFAULT_; @@ -5428,10 +5432,16 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { if (parseHwregBody(HwReg, Offset, Width) && validateHwreg(HwReg, Offset, Width, Loc)) { ImmVal = encodeHwreg(HwReg.Id, Offset, Width); + } else { + return MatchOperand_ParseFail; } } else if (parseExpr(ImmVal)) { - if (ImmVal < 0 || !isUInt<16>(ImmVal)) + if (ImmVal < 0 || !isUInt<16>(ImmVal)) { Error(Loc, "invalid immediate: only 16-bit values are legal"); + return MatchOperand_ParseFail; + } + } else { + return MatchOperand_ParseFail; } Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); @@ -5518,8 +5528,6 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { int64_t ImmVal = 0; SMLoc Loc = getLoc(); - // If parse failed, do not return error code - // to avoid excessive error messages. if (trySkipId("sendmsg", AsmToken::LParen)) { OperandInfoTy Msg(ID_UNKNOWN_); OperandInfoTy Op(OP_NONE_); @@ -5527,10 +5535,16 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { if (parseSendMsgBody(Msg, Op, Stream) && validateSendMsg(Msg, Op, Stream, Loc)) { ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); + } else { + return MatchOperand_ParseFail; } } else if (parseExpr(ImmVal)) { - if (ImmVal < 0 || !isUInt<16>(ImmVal)) + if (ImmVal < 0 || !isUInt<16>(ImmVal)) { Error(Loc, "invalid immediate: only 16-bit values are legal"); + return MatchOperand_ParseFail; + } + } else { + return MatchOperand_ParseFail; } Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); @@ -5594,7 +5608,7 @@ OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { Parser.Lex(); if (Attr > 63) { Error(S, "out of bounds attr"); - return MatchOperand_Success; + return MatchOperand_ParseFail; } SMLoc SChan = SMLoc::getFromPointer(Chan.data()); @@ -5610,10 +5624,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { // exp //===----------------------------------------------------------------------===// -void AMDGPUAsmParser::errorExpTgt() { - Error(Parser.getTok().getLoc(), "invalid exp target"); -} - OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, uint8_t &Val) { if (Str == "null") { @@ -5631,8 +5641,10 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, if (Str.getAsInteger(10, Val)) return MatchOperand_ParseFail; - if (Val > 7) - errorExpTgt(); + if (Val > 7) { + Error(getLoc(), "invalid exp target"); + return MatchOperand_ParseFail; + } return MatchOperand_Success; } @@ -5642,8 +5654,10 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, if (Str.getAsInteger(10, Val)) return MatchOperand_ParseFail; - if (Val > 4 || (Val == 4 && !isGFX10())) - errorExpTgt(); + if (Val > 4 || (Val == 4 && !isGFX10())) { + Error(getLoc(), "invalid exp target"); + return MatchOperand_ParseFail; + } Val += 12; return MatchOperand_Success; @@ -5659,8 +5673,10 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, if (Str.getAsInteger(10, Val)) return MatchOperand_ParseFail; - if (Val >= 32) - errorExpTgt(); + if (Val >= 32) { + Error(getLoc(), "invalid exp target"); + return MatchOperand_ParseFail; + } Val += 32; return MatchOperand_Success; @@ -5671,8 +5687,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, if (Str.getAsInteger(10, Val)) return MatchOperand_ParseFail; - errorExpTgt(); - return MatchOperand_Success; + Error(getLoc(), "invalid exp target"); + return MatchOperand_ParseFail; } return MatchOperand_NoMatch; @@ -6107,12 +6123,12 @@ int64_t AMDGPUAsmParser::parseGPRIdxMacro() { Error(S, (Imm == 0)? "expected a VGPR index mode or a closing parenthesis" : "expected a VGPR index mode"); - break; + return UNDEF; } if (Imm & Mode) { Error(S, "duplicate VGPR index mode"); - break; + return UNDEF; } Imm |= Mode; @@ -6120,7 +6136,7 @@ int64_t AMDGPUAsmParser::parseGPRIdxMacro() { break; if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) - break; + return UNDEF; } return Imm; @@ -6129,6 +6145,8 @@ int64_t AMDGPUAsmParser::parseGPRIdxMacro() { OperandMatchResultTy AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { + using namespace llvm::AMDGPU::VGPRIndexMode; + int64_t Imm = 0; SMLoc S = Parser.getTok().getLoc(); @@ -6139,15 +6157,16 @@ AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { Parser.Lex(); Parser.Lex(); - // If parse failed, trigger an error but do not return error code - // to avoid excessive error messages. Imm = parseGPRIdxMacro(); + if (Imm == UNDEF) + return MatchOperand_ParseFail; } else { if (getParser().parseAbsoluteExpression(Imm)) - return MatchOperand_NoMatch; + return MatchOperand_ParseFail; if (Imm < 0 || !isUInt<4>(Imm)) { Error(S, "invalid immediate: only 4-bit values are legal"); + return MatchOperand_ParseFail; } } @@ -6173,22 +6192,22 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { if (isRegister() || isModifier()) return MatchOperand_NoMatch; - if (parseExpr(Operands)) { + if (!parseExpr(Operands)) + return MatchOperand_ParseFail; - AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); - assert(Opr.isImm() || Opr.isExpr()); - SMLoc Loc = Opr.getStartLoc(); + AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); + assert(Opr.isImm() || Opr.isExpr()); + SMLoc Loc = Opr.getStartLoc(); - // Currently we do not support arbitrary expressions as branch targets. - // Only labels and absolute expressions are accepted. - if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { - Error(Loc, "expected an absolute expression or a label"); - } else if (Opr.isImm() && !Opr.isS16Imm()) { - Error(Loc, "expected a 16-bit signed jump offset"); - } + // Currently we do not support arbitrary expressions as branch targets. + // Only labels and absolute expressions are accepted. + if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { + Error(Loc, "expected an absolute expression or a label"); + } else if (Opr.isImm() && !Opr.isS16Imm()) { + Error(Loc, "expected a 16-bit signed jump offset"); } - return MatchOperand_Success; // avoid excessive error messages + return MatchOperand_Success; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 58d77f3b224b5..d6013baf0f36e 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -217,7 +217,8 @@ enum EncBits : unsigned { SRC1_ENABLE = 1 << ID_SRC1, SRC2_ENABLE = 1 << ID_SRC2, DST_ENABLE = 1 << ID_DST, - ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE + ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE, + UNDEF = 0xFFFF }; } // namespace VGPRIndexMode diff --git a/llvm/test/MC/AMDGPU/add-sub-no-carry.s b/llvm/test/MC/AMDGPU/add-sub-no-carry.s index 8398199a89568..884d1dd850722 100644 --- a/llvm/test/MC/AMDGPU/add-sub-no-carry.s +++ b/llvm/test/MC/AMDGPU/add-sub-no-carry.s @@ -1,7 +1,7 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI --implicit-check-not=error: %s // FIXME: pre-gfx9 errors should be more useful diff --git a/llvm/test/MC/AMDGPU/atomic-fadd-insts.s b/llvm/test/MC/AMDGPU/atomic-fadd-insts.s index a0a516e4d772b..70014c6fafc46 100644 --- a/llvm/test/MC/AMDGPU/atomic-fadd-insts.s +++ b/llvm/test/MC/AMDGPU/atomic-fadd-insts.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck --check-prefix=GFX908 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX908-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck --check-prefix=GFX908-ERR --implicit-check-not=error: %s buffer_atomic_add_f32 v5, off, s[8:11], s3 offset:4095 // GFX908: encoding: [0xff,0x0f,0x34,0xe1,0x00,0x05,0x02,0x03] diff --git a/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s b/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s index 196dcada2ebea..86efb1883339b 100644 --- a/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s +++ b/llvm/test/MC/AMDGPU/buf-fmt-d16-packed.s @@ -1,7 +1,7 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=PACKED %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=PACKED %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding 2>&1 %s | FileCheck -check-prefix=UNPACKED-ERR -check-prefix=GCN-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji 2>&1 %s | FileCheck -check-prefix=UNPACKED-ERR -check-prefix=GCN-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s b/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s index 15cfb225b8b55..f8e6407c0548e 100644 --- a/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s +++ b/llvm/test/MC/AMDGPU/buf-fmt-d16-unpacked.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/dl-insts-err.s b/llvm/test/MC/AMDGPU/dl-insts-err.s index 3f6d4fd861102..efdf079d8b889 100644 --- a/llvm/test/MC/AMDGPU/dl-insts-err.s +++ b/llvm/test/MC/AMDGPU/dl-insts-err.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx800 -show-encoding %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx800 %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908 // // Test unsupported GPUs. @@ -44,17 +44,17 @@ v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[] v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0,0,0,0] @@ -72,17 +72,17 @@ v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[] v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,0,0,0,0] @@ -100,17 +100,17 @@ v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[] v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_lo value. v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0,0,0] @@ -128,17 +128,17 @@ v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[] v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid neg_hi value. v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,0,0,0,0] @@ -156,17 +156,17 @@ v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[] v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0,0,0,0] @@ -184,17 +184,17 @@ v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[] v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,0,0,0,0] @@ -216,17 +216,17 @@ v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[] v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[,] // GFX906-GFX908: error: unknown token in expression v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0,0,0,0] @@ -246,15 +246,15 @@ v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[,] v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[,0] // GFX906-GFX908: error: invalid op_sel_hi value v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[2,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[2,2] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,-1] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[-1,0] -// GFX906-GFX908: error: failed parsing operand +// GFX906-GFX908: error: invalid op_sel_hi value. v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[-1,-1] // GFX906-GFX908: error: expected a closing square bracket v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,0,0,0,0] diff --git a/llvm/test/MC/AMDGPU/dpp-err.s b/llvm/test/MC/AMDGPU/dpp-err.s index a3ab0f38abf7b..19d896d82d592 100644 --- a/llvm/test/MC/AMDGPU/dpp-err.s +++ b/llvm/test/MC/AMDGPU/dpp-err.s @@ -1,38 +1,38 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX89-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GFX89 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX89 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GFX89-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX89-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s v_mov_b32_dpp v0, v1 row_share:1 row_mask:0x1 bank_mask:0x1 -// GFX89-ERR: not a valid operand. +// GFX89-ERR: error: not a valid operand. // GFX10: v_mov_b32_dpp v0, v1 row_share:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x51,0x01,0x11] v_mov_b32_dpp v0, v1 row_xmask:1 row_mask:0x1 bank_mask:0x1 -// GFX89-ERR: not a valid operand. +// GFX89-ERR: error: not a valid operand. // GFX10: v_mov_b32_dpp v0, v1 row_xmask:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x61,0x01,0x11] v_mov_b32_dpp v0, v1 wave_shl:1 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 wave_shl:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x30,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 wave_shr:1 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 wave_shr:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x38,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 wave_rol:1 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 wave_rol:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x34,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 wave_ror:1 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 wave_ror:1 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x3c,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 row_bcast:15 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 row_bcast:15 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x42,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. v_mov_b32_dpp v0, v1 row_bcast:31 row_mask:0x1 bank_mask:0x1 // GFX89: v0, v1 row_bcast:31 row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x43,0x01,0x11] -// GFX10-ERR: not a valid operand. +// GFX10-ERR: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/ds-err.s b/llvm/test/MC/AMDGPU/ds-err.s index 7ed4080246a03..507bcbc1c4da9 100644 --- a/llvm/test/MC/AMDGPU/ds-err.s +++ b/llvm/test/MC/AMDGPU/ds-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --implicit-check-not=error: %s // offset too big // CHECK: error: invalid operand for instruction @@ -18,19 +18,19 @@ ds_write2_b32 v2, v4, v6 offset0:4 offset0:8 ds_write2_b32 v2, v4, v6 offset1:4 offset1:8 // offset0 too big -// CHECK: invalid operand for instruction +// CHECK: error: invalid operand for instruction ds_write2_b32 v2, v4, v6 offset0:1000000000 // offset0 too big -// CHECK: invalid operand for instruction +// CHECK: error: invalid operand for instruction ds_write2_b32 v2, v4, v6 offset0:0x100 // offset1 too big -// CHECK: invalid operand for instruction +// CHECK: error: invalid operand for instruction ds_write2_b32 v2, v4, v6 offset1:1000000000 // offset1 too big -// CHECK: invalid operand for instruction +// CHECK: error: invalid operand for instruction ds_write2_b32 v2, v4, v6 offset1:0x100 //===----------------------------------------------------------------------===// @@ -40,7 +40,7 @@ ds_write2_b32 v2, v4, v6 offset1:0x100 // CHECK: error: expected a colon ds_swizzle_b32 v8, v2 offset -// CHECK: error: failed parsing operand +// CHECK: error: unknown token in expression ds_swizzle_b32 v8, v2 offset: // CHECK: error: expected a colon @@ -121,5 +121,5 @@ ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "ppii") // CHECK: error: expected a 5-character mask ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "pppiii") -// CHECK: invalid mask +// CHECK: error: invalid mask ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "pppi2") diff --git a/llvm/test/MC/AMDGPU/ds-gfx9.s b/llvm/test/MC/AMDGPU/ds-gfx9.s index 810ccb018e855..2ed2f953b0ca8 100644 --- a/llvm/test/MC/AMDGPU/ds-gfx9.s +++ b/llvm/test/MC/AMDGPU/ds-gfx9.s @@ -1,5 +1,5 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s ds_read_u8_d16 v8, v2 // GFX9: ds_read_u8_d16 v8, v2 ; encoding: [0x00,0x00,0xac,0xd8,0x02,0x00,0x00,0x08] diff --git a/llvm/test/MC/AMDGPU/ds.s b/llvm/test/MC/AMDGPU/ds.s index 70f52972a81c8..25c3cdd38830b 100644 --- a/llvm/test/MC/AMDGPU/ds.s +++ b/llvm/test/MC/AMDGPU/ds.s @@ -3,9 +3,9 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=CI --check-prefix=SICI // RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOCI --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOCI --check-prefix=NOSICI --implicit-check-not=error: //===----------------------------------------------------------------------===// // Checks for 16-bit Offsets @@ -16,11 +16,11 @@ ds_add_u32 v2, v4 offset:16 // VI: ds_add_u32 v2, v4 offset:16 ; encoding: [0x10,0x00,0x00,0xd8,0x02,0x04,0x00,0x00] ds_add_src2_f32 v255 offset:65535 -// NOSICI: error +// NOSICI: error: not a valid operand. // VI: ds_add_src2_f32 v255 offset:65535 ; encoding: [0xff,0xff,0x2a,0xd9,0xff,0x00,0x00,0x00] ds_add_src2_f32 v0 offset:4 gds -// NOSICI: error +// NOSICI: error: not a valid operand. // VI: ds_add_src2_f32 v0 offset:4 gds ; encoding: [0x04,0x00,0x2b,0xd9,0x00,0x00,0x00,0x00] //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/exp-err.s b/llvm/test/MC/AMDGPU/exp-err.s index 22d3edf0e0318..b3494a11fa08b 100644 --- a/llvm/test/MC/AMDGPU/exp-err.s +++ b/llvm/test/MC/AMDGPU/exp-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN --implicit-check-not=error: %s exp mrt8 v3, v2, v1, v0 // GCN: :5: error: invalid exp target diff --git a/llvm/test/MC/AMDGPU/exp-gfx10.s b/llvm/test/MC/AMDGPU/exp-gfx10.s index e207c5f0ede3c..2a02cef542ee3 100644 --- a/llvm/test/MC/AMDGPU/exp-gfx10.s +++ b/llvm/test/MC/AMDGPU/exp-gfx10.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=verde -show-encoding %s 2>&1 | FileCheck -check-prefix=SI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=verde %s 2>&1 | FileCheck -check-prefix=SI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=VI --implicit-check-not=error: %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s exp prim v1, off, off, off diff --git a/llvm/test/MC/AMDGPU/expressions-gfx10.s b/llvm/test/MC/AMDGPU/expressions-gfx10.s index b3f051b819b7f..8c413879a3c02 100644 --- a/llvm/test/MC/AMDGPU/expressions-gfx10.s +++ b/llvm/test/MC/AMDGPU/expressions-gfx10.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck %s --check-prefix=GFX10 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck -check-prefix=NOGFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck -check-prefix=NOGFX10 --implicit-check-not=error: %s i1=1 diff --git a/llvm/test/MC/AMDGPU/expressions-gfx9.s b/llvm/test/MC/AMDGPU/expressions-gfx9.s index a52887596af60..5419c8ed5cb9f 100644 --- a/llvm/test/MC/AMDGPU/expressions-gfx9.s +++ b/llvm/test/MC/AMDGPU/expressions-gfx9.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: //===----------------------------------------------------------------------===// // Relocatable expressions cannot be used with SDWA modifiers. diff --git a/llvm/test/MC/AMDGPU/expressions.s b/llvm/test/MC/AMDGPU/expressions.s index 37fe08a52d1ba..57f47d8f0345d 100644 --- a/llvm/test/MC/AMDGPU/expressions.s +++ b/llvm/test/MC/AMDGPU/expressions.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: //===----------------------------------------------------------------------===// // Floating-point expressions are not supported @@ -52,10 +52,10 @@ v_mad_f16 v5, v1, v2, |hm1| // Only primary expressions are allowed v_ceil_f32 v1, |1+i1| -// NOVI: failed parsing operand +// NOVI: error: expected vertical bar v_ceil_f32 v1, |i1+1| -// NOVI: failed parsing operand +// NOVI: error: expected vertical bar //===----------------------------------------------------------------------===// // Constant expressions may be used with 'abs' and 'neg' modifiers. @@ -327,8 +327,8 @@ v_sin_f32 v0, -[ttmp0] s1000=1 v_sin_f32 v0, -s1000 -// NOVI: failed parsing operand +// NOVI: error: not a valid operand. xnack_mask_lo=1 v_sin_f32 v0, xnack_mask_lo -// NOVI: failed parsing operand +// NOVI: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/flat-gfx10.s b/llvm/test/MC/AMDGPU/flat-gfx10.s index bf728d1618be9..90229630cfe72 100644 --- a/llvm/test/MC/AMDGPU/flat-gfx10.s +++ b/llvm/test/MC/AMDGPU/flat-gfx10.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s flat_load_dword v1, v[3:4] // GFX10: encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x7d,0x01] diff --git a/llvm/test/MC/AMDGPU/flat-gfx9.s b/llvm/test/MC/AMDGPU/flat-gfx9.s index bb6839a9b13f9..f0aff08fe6db0 100644 --- a/llvm/test/MC/AMDGPU/flat-gfx9.s +++ b/llvm/test/MC/AMDGPU/flat-gfx9.s @@ -1,8 +1,8 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s flat_load_dword v1, v[3:4] offset:0 diff --git a/llvm/test/MC/AMDGPU/flat-global.s b/llvm/test/MC/AMDGPU/flat-global.s index b771073407fe7..7a1d3333fb73d 100644 --- a/llvm/test/MC/AMDGPU/flat-global.s +++ b/llvm/test/MC/AMDGPU/flat-global.s @@ -1,14 +1,14 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR --implicit-check-not=error: %s global_load_ubyte v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x20,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ubyte v1, v[3:4], off ; encoding: [0x00,0x80,0x40,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_ubyte v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x20,0xdc,0x03,0x00,0x7d,0x01] @@ -18,7 +18,7 @@ global_load_ubyte v1, v[3:4], off dlc global_load_sbyte v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x24,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sbyte v1, v[3:4], off ; encoding: [0x00,0x80,0x44,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_sbyte v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x24,0xdc,0x03,0x00,0x7d,0x01] @@ -28,7 +28,7 @@ global_load_sbyte v1, v[3:4], off dlc global_load_ushort v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x28,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ushort v1, v[3:4], off ; encoding: [0x00,0x80,0x48,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_ushort v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x28,0xdc,0x03,0x00,0x7d,0x01] @@ -38,7 +38,7 @@ global_load_ushort v1, v[3:4], off dlc global_load_sshort v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x2c,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sshort v1, v[3:4], off ; encoding: [0x00,0x80,0x4c,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_sshort v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x2c,0xdc,0x03,0x00,0x7d,0x01] @@ -48,7 +48,7 @@ global_load_sshort v1, v[3:4], off dlc global_load_dword v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dword v1, v[3:4], off ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dword v1, v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x30,0xdc,0x03,0x00,0x7d,0x01] @@ -58,7 +58,7 @@ global_load_dword v1, v[3:4], off dlc global_load_dwordx2 v[1:2], v[3:4], off // GFX10: encoding: [0x00,0x80,0x34,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dwordx2 v[1:2], v[3:4], off ; encoding: [0x00,0x80,0x54,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dwordx2 v[1:2], v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x34,0xdc,0x03,0x00,0x7d,0x01] @@ -68,7 +68,7 @@ global_load_dwordx2 v[1:2], v[3:4], off dlc global_load_dwordx3 v[1:3], v[3:4], off // GFX10: encoding: [0x00,0x80,0x3c,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dwordx3 v[1:3], v[3:4], off ; encoding: [0x00,0x80,0x58,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dwordx3 v[1:3], v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x3c,0xdc,0x03,0x00,0x7d,0x01] @@ -78,7 +78,7 @@ global_load_dwordx3 v[1:3], v[3:4], off dlc global_load_dwordx4 v[1:4], v[3:4], off // GFX10: encoding: [0x00,0x80,0x38,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dwordx4 v[1:4], v[3:4], off ; encoding: [0x00,0x80,0x5c,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dwordx4 v[1:4], v[3:4], off dlc // GFX10: encoding: [0x00,0x90,0x38,0xdc,0x03,0x00,0x7d,0x01] @@ -119,7 +119,7 @@ global_load_dword v1, v[3:4] off, offset:-4097 global_store_byte v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_byte v[3:4], v1, off ; encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_byte v[3:4], v1, off dlc // GFX10: encoding: [0x00,0x90,0x60,0xdc,0x03,0x01,0x7d,0x00] @@ -129,7 +129,7 @@ global_store_byte v[3:4], v1, off dlc global_store_short v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_short v[3:4], v1, off ; encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_short v[3:4], v1, off dlc // GFX10: encoding: [0x00,0x90,0x68,0xdc,0x03,0x01,0x7d,0x00] @@ -139,7 +139,7 @@ global_store_short v[3:4], v1, off dlc global_store_dword v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dword v[3:4], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_dword v[3:4], v1, off dlc // GFX10: encoding: [0x00,0x90,0x70,0xdc,0x03,0x01,0x7d,0x00] @@ -149,7 +149,7 @@ global_store_dword v[3:4], v1, off dlc global_store_dwordx2 v[3:4], v[1:2], off // GFX10: encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dwordx2 v[3:4], v[1:2], off ; encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_dwordx2 v[3:4], v[1:2], off dlc // GFX10: encoding: [0x00,0x90,0x74,0xdc,0x03,0x01,0x7d,0x00] @@ -159,7 +159,7 @@ global_store_dwordx2 v[3:4], v[1:2], off dlc global_store_dwordx3 v[3:4], v[1:3], off // GFX10: encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dwordx3 v[3:4], v[1:3], off ; encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_dwordx3 v[3:4], v[1:3], off dlc // GFX10: encoding: [0x00,0x90,0x7c,0xdc,0x03,0x01,0x7d,0x00] @@ -169,7 +169,7 @@ global_store_dwordx3 v[3:4], v[1:3], off dlc global_store_dwordx4 v[3:4], v[1:4], off // GFX10: encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dwordx4 v[3:4], v[1:4], off ; encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_dwordx4 v[3:4], v[1:4], off dlc // GFX10: encoding: [0x00,0x90,0x78,0xdc,0x03,0x01,0x7d,0x00] @@ -179,32 +179,32 @@ global_store_dwordx4 v[3:4], v[1:4], off dlc global_store_dword v[3:4], v1, off offset:12 // GFX10: encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dword v[3:4], v1, off offset:12 ; encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: [[@LINE-3]]:36: error: not a valid operand +// VI-ERR: :36: error: not a valid operand global_load_dword v1, v3, s[2:3] // GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x02,0x01] // GFX9: global_load_dword v1, v3, s[2:3] ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x02,0x01] -// VI-ERR: [[@LINE-3]]:1: error: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dword v1, v3, s[2:3] offset:24 // GFX10: encoding: [0x18,0x80,0x30,0xdc,0x03,0x00,0x02,0x01] // GFX9: global_load_dword v1, v3, s[2:3] offset:24 ; encoding: [0x18,0x80,0x50,0xdc,0x03,0x00,0x02,0x01] -// VI-ERR: [[@LINE-3]]:34: error: not a valid operand. +// VI-ERR: :34: error: not a valid operand. global_load_dword v1, v3, s[2:3] offset:-8 // GFX10: encoding: [0xf8,0x8f,0x30,0xdc,0x03,0x00,0x02,0x01] // GFX9: global_load_dword v1, v3, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x50,0xdc,0x03,0x00,0x02,0x01] -// VI-ERR: [[@LINE-3]]:34: error: not a valid operand. +// VI-ERR: :34: error: not a valid operand. global_store_dword v3, v1, s[2:3] // GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] // GFX9: global_store_dword v3, v1, s[2:3] ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_dword v3, v1, s[2:3] offset:24 // GFX10: encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] // GFX9: global_store_dword v3, v1, s[2:3] offset:24 ; encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] -// VI-ERR: [[@LINE-3]]:35: error: not a valid operand. +// VI-ERR: :35: error: not a valid operand. global_store_dword v3, v1, s[2:3] offset:-8 // GFX10: encoding: [0xf8,0x8f,0x70,0xdc,0x03,0x01,0x02,0x00] @@ -215,7 +215,7 @@ global_store_dword v3, v1, s[2:3] offset:-8 global_store_dword v3, v1, exec // GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00] // GFX9: global_store_dword v3, v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_dword v1, v[3:4], s2 // GFX10-ERR: error: invalid operand for instruction @@ -250,107 +250,107 @@ global_atomic_swap_x2 v[3:4], v[5:6], off global_atomic_add v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xc8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_add v[3:4], v5, off ; encoding: [0x00,0x80,0x08,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_sub v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xcc,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_sub v[3:4], v5, off ; encoding: [0x00,0x80,0x0c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_smin v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xd4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smin v[3:4], v5, off ; encoding: [0x00,0x80,0x10,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_umin v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xd8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umin v[3:4], v5, off ; encoding: [0x00,0x80,0x14,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_smax v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xdc,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smax v[3:4], v5, off ; encoding: [0x00,0x80,0x18,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_umax v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xe0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umax v[3:4], v5, off ; encoding: [0x00,0x80,0x1c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_and v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xe4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_and v[3:4], v5, off ; encoding: [0x00,0x80,0x20,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_or v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xe8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_or v[3:4], v5, off ; encoding: [0x00,0x80,0x24,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_xor v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xec,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_xor v[3:4], v5, off ; encoding: [0x00,0x80,0x28,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_inc v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xf0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_inc v[3:4], v5, off ; encoding: [0x00,0x80,0x2c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_dec v[3:4], v5, off // GFX10: encoding: [0x00,0x80,0xf4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_dec v[3:4], v5, off ; encoding: [0x00,0x80,0x30,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_add_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x48,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_add_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x88,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_sub_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x4c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_sub_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x8c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_smin_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x54,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smin_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x90,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_umin_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x58,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umin_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x94,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_smax_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x5c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smax_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x98,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_umax_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x60,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umax_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x9c,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_and_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x64,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_and_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa0,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_or_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x68,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_or_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa4,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_xor_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x6c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_xor_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa8,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_inc_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x70,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_inc_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xac,0xdd,0x03,0x05,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_dec_x2 v[3:4], v[5:6], off // GFX10: encoding: [0x00,0x80,0x74,0xdd,0x03,0x05,0x7d,0x00] @@ -490,42 +490,42 @@ global_atomic_dec_x2 v[3:4], v[5:6], off offset:-16 global_load_ubyte_d16 v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x80,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ubyte_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x80,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_ubyte_d16_hi v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x84,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ubyte_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x84,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_sbyte_d16 v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x88,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sbyte_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x88,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_sbyte_d16_hi v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x8c,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sbyte_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x8c,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_short_d16 v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x90,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_short_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x90,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_load_short_d16_hi v1, v[3:4], off // GFX10: encoding: [0x00,0x80,0x94,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_short_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x94,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_byte_d16_hi v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_byte_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_store_short_d16_hi v[3:4], v1, off // GFX10: encoding: [0x00,0x80,0x6c,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_short_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x6c,0xdc,0x03,0x01,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU global_atomic_add v0, v[1:2], v2, off glc slc // GFX10: global_atomic_add v0, v[1:2], v2, off glc slc ; encoding: [0x00,0x80,0xcb,0xdc,0x01,0x02,0x7d,0x00] diff --git a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s index c0e1670a6bd4f..fb795105419ce 100644 --- a/llvm/test/MC/AMDGPU/flat-scratch-instructions.s +++ b/llvm/test/MC/AMDGPU/flat-scratch-instructions.s @@ -1,14 +1,14 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s scratch_load_ubyte v1, v2, off // GFX10: encoding: [0x00,0x40,0x20,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ubyte v1, v2, off ; encoding: [0x00,0x40,0x40,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_ubyte v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x20,0xdc,0x02,0x00,0x7d,0x01] @@ -18,7 +18,7 @@ scratch_load_ubyte v1, v2, off dlc scratch_load_sbyte v1, v2, off // GFX10: encoding: [0x00,0x40,0x24,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sbyte v1, v2, off ; encoding: [0x00,0x40,0x44,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_sbyte v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x24,0xdc,0x02,0x00,0x7d,0x01] @@ -28,7 +28,7 @@ scratch_load_sbyte v1, v2, off dlc scratch_load_ushort v1, v2, off // GFX10: encoding: [0x00,0x40,0x28,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ushort v1, v2, off ; encoding: [0x00,0x40,0x48,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_ushort v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x28,0xdc,0x02,0x00,0x7d,0x01] @@ -38,7 +38,7 @@ scratch_load_ushort v1, v2, off dlc scratch_load_sshort v1, v2, off // GFX10: encoding: [0x00,0x40,0x2c,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sshort v1, v2, off ; encoding: [0x00,0x40,0x4c,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_sshort v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x2c,0xdc,0x02,0x00,0x7d,0x01] @@ -48,7 +48,7 @@ scratch_load_sshort v1, v2, off dlc scratch_load_dword v1, v2, off // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_dword v1, v2, off ; encoding: [0x00,0x40,0x50,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x30,0xdc,0x02,0x00,0x7d,0x01] @@ -58,7 +58,7 @@ scratch_load_dword v1, v2, off dlc scratch_load_dwordx2 v[1:2], v3, off // GFX10: encoding: [0x00,0x40,0x34,0xdc,0x03,0x00,0x7d,0x01] // GFX9: scratch_load_dwordx2 v[1:2], v3, off ; encoding: [0x00,0x40,0x54,0xdc,0x03,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dwordx2 v[1:2], v3, off dlc // GFX10: encoding: [0x00,0x50,0x34,0xdc,0x03,0x00,0x7d,0x01] @@ -68,7 +68,7 @@ scratch_load_dwordx2 v[1:2], v3, off dlc scratch_load_dwordx3 v[1:3], v4, off // GFX10: encoding: [0x00,0x40,0x3c,0xdc,0x04,0x00,0x7d,0x01] // GFX9: scratch_load_dwordx3 v[1:3], v4, off ; encoding: [0x00,0x40,0x58,0xdc,0x04,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dwordx3 v[1:3], v4, off dlc // GFX10: encoding: [0x00,0x50,0x3c,0xdc,0x04,0x00,0x7d,0x01] @@ -78,7 +78,7 @@ scratch_load_dwordx3 v[1:3], v4, off dlc scratch_load_dwordx4 v[1:4], v5, off // GFX10: encoding: [0x00,0x40,0x38,0xdc,0x05,0x00,0x7d,0x01] // GFX9: scratch_load_dwordx4 v[1:4], v5, off ; encoding: [0x00,0x40,0x5c,0xdc,0x05,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dwordx4 v[1:4], v5, off dlc // GFX10: encoding: [0x00,0x50,0x38,0xdc,0x05,0x00,0x7d,0x01] @@ -138,7 +138,7 @@ scratch_load_dword v255, off, s0 offset:2048 scratch_store_byte v1, v2, off // GFX10: encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_byte v1, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_byte v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x60,0xdc,0x01,0x02,0x7d,0x00] @@ -148,7 +148,7 @@ scratch_store_byte v1, v2, off dlc scratch_store_short v1, v2, off // GFX10: encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_short v1, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_short v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x68,0xdc,0x01,0x02,0x7d,0x00] @@ -158,7 +158,7 @@ scratch_store_short v1, v2, off dlc scratch_store_dword v1, v2, off // GFX10: encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dword v1, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword v1, v2, off dlc // GFX10: encoding: [0x00,0x50,0x70,0xdc,0x01,0x02,0x7d,0x00] @@ -168,7 +168,7 @@ scratch_store_dword v1, v2, off dlc scratch_store_dwordx2 v1, v[2:3], off // GFX10: encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dwordx2 v1, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dwordx2 v1, v[2:3], off dlc // GFX10: encoding: [0x00,0x50,0x74,0xdc,0x01,0x02,0x7d,0x00] @@ -178,7 +178,7 @@ scratch_store_dwordx2 v1, v[2:3], off dlc scratch_store_dwordx3 v1, v[2:4], off // GFX10: encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dwordx3 v1, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dwordx3 v1, v[2:4], off dlc // GFX10: encoding: [0x00,0x50,0x7c,0xdc,0x01,0x02,0x7d,0x00] @@ -188,7 +188,7 @@ scratch_store_dwordx3 v1, v[2:4], off dlc scratch_store_dwordx4 v1, v[2:5], off // GFX10: encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dwordx4 v1, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7f,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dwordx4 v1, v[2:5], off dlc // GFX10: encoding: [0x00,0x50,0x78,0xdc,0x01,0x02,0x7d,0x00] @@ -203,7 +203,7 @@ scratch_store_dword v1, v2, off offset:12 scratch_load_dword v1, off, s1 // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x01,0x01] // GFX9: scratch_load_dword v1, off, s1 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x01,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, off, s1 offset:32 // GFX10: encoding: [0x20,0x40,0x30,0xdc,0x00,0x00,0x01,0x01] @@ -213,7 +213,7 @@ scratch_load_dword v1, off, s1 offset:32 scratch_store_dword off, v2, s1 // GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] // GFX9: scratch_store_dword off, v2, s1 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword off, v2, s1 offset:12 // GFX10: encoding: [0x0c,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] @@ -254,59 +254,59 @@ scratch_store_dword off, v2, exec_hi scratch_load_dword v1, off, exec_lo // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x7e,0x01] // GFX9: scratch_load_dword v1, off, exec_lo ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7e,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword off, v2, exec_lo // GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7e,0x00] // GFX9: scratch_store_dword off, v2, exec_lo ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7e,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_dword v1, off, m0 // GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x7c,0x01] // GFX9: scratch_load_dword v1, off, m0 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7c,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_dword off, v2, m0 // GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7c,0x00] // GFX9: scratch_store_dword off, v2, m0 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7c,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_ubyte_d16 v1, v2, off // GFX10: encoding: [0x00,0x40,0x80,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ubyte_d16 v1, v2, off ; encoding: [0x00,0x40,0x80,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_ubyte_d16_hi v1, v2, off // GFX10: encoding: [0x00,0x40,0x84,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ubyte_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x84,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_sbyte_d16 v1, v2, off // GFX10: encoding: [0x00,0x40,0x88,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sbyte_d16 v1, v2, off ; encoding: [0x00,0x40,0x88,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_sbyte_d16_hi v1, v2, off // GFX10: encoding: [0x00,0x40,0x8c,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sbyte_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x8c,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_short_d16 v1, v2, off // GFX10: encoding: [0x00,0x40,0x90,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_short_d16 v1, v2, off ; encoding: [0x00,0x40,0x90,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_load_short_d16_hi v1, v2, off // GFX10: encoding: [0x00,0x40,0x94,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_short_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x94,0xdc,0x02,0x00,0x7f,0x01] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_byte_d16_hi off, v2, s1 // GFX10: encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x01,0x00] // GFX9: scratch_store_byte_d16_hi off, v2, s1 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x01,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU scratch_store_short_d16_hi off, v2, s1 // GFX10: encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x01,0x00] // GFX9: scratch_store_short_d16_hi off, v2, s1 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x01,0x00] -// VI-ERR: instruction not supported on this GPU +// VI-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/flat-scratch.s b/llvm/test/MC/AMDGPU/flat-scratch.s index e87f59dafeeae..eea2f0d07f3ea 100644 --- a/llvm/test/MC/AMDGPU/flat-scratch.s +++ b/llvm/test/MC/AMDGPU/flat-scratch.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOCI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefix=NOCI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck -check-prefix=CI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI %s diff --git a/llvm/test/MC/AMDGPU/flat.s b/llvm/test/MC/AMDGPU/flat.s index 8351233e466b6..bfb71c9ebf4d2 100644 --- a/llvm/test/MC/AMDGPU/flat.s +++ b/llvm/test/MC/AMDGPU/flat.s @@ -7,44 +7,44 @@ // error: instruction not supported on this GPU // -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: //===----------------------------------------------------------------------===// // Operands //===----------------------------------------------------------------------===// flat_load_dword v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01] flat_load_dword v1, v[3:4] glc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x51,0xdc,0x03,0x00,0x00,0x01] flat_load_dword v1, v[3:4] glc slc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x00,0x01] flat_store_dword v[3:4], v1 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_dword v[3:4], v1 ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00] flat_store_dword v[3:4], v1 glc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CIVI: flat_store_dword v[3:4], v1 glc ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x00,0x00] flat_store_dword v[3:4], v1 glc slc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CIVI: flat_store_dword v[3:4], v1 glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00] flat_store_dword v[3:4], v1 slc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CIVI: flat_store_dword v[3:4], v1 slc ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x00,0x00] // FIXME: For atomic instructions, glc must be placed immediately following @@ -53,12 +53,12 @@ flat_store_dword v[3:4], v1 slc // flat_atomic_add v1, v[3:4], v5 slc glc flat_atomic_add v1, v[3:4], v5 offset:0 glc slc -// NOSI: error: +// NOSI: error: not a valid operand. // CI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01] flat_atomic_add v[3:4], v5 slc -// NOSI: error: +// NOSI: error: invalid operand for instruction // CI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0x0a,0xdd,0x03,0x05,0x00,0x00] @@ -67,367 +67,367 @@ flat_atomic_add v[3:4], v5 slc //===----------------------------------------------------------------------===// flat_load_ubyte v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_ubyte v1, v[3:4] ; encoding: [0x00,0x00,0x20,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_ubyte v1, v[3:4] ; encoding: [0x00,0x00,0x40,0xdc,0x03,0x00,0x00,0x01] flat_load_sbyte v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_sbyte v1, v[3:4] ; encoding: [0x00,0x00,0x24,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_sbyte v1, v[3:4] ; encoding: [0x00,0x00,0x44,0xdc,0x03,0x00,0x00,0x01] flat_load_ushort v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_ushort v1, v[3:4] ; encoding: [0x00,0x00,0x28,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_ushort v1, v[3:4] ; encoding: [0x00,0x00,0x48,0xdc,0x03,0x00,0x00,0x01] flat_load_sshort v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_sshort v1, v[3:4] ; encoding: [0x00,0x00,0x2c,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_sshort v1, v[3:4] ; encoding: [0x00,0x00,0x4c,0xdc,0x03,0x00,0x00,0x01] flat_load_dword v1, v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01] flat_load_dwordx2 v[1:2], v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x34,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x54,0xdc,0x03,0x00,0x00,0x01] flat_load_dwordx4 v[5:8], v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x38,0xdc,0x03,0x00,0x00,0x05] // VI: flat_load_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x5c,0xdc,0x03,0x00,0x00,0x05] flat_load_dwordx3 v[5:7], v[3:4] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x3c,0xdc,0x03,0x00,0x00,0x05] // VI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x58,0xdc,0x03,0x00,0x00,0x05] flat_store_byte v[3:4], v1 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_byte v[3:4], v1 ; encoding: [0x00,0x00,0x60,0xdc,0x03,0x01,0x00,0x00] flat_store_short v[3:4], v1 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_short v[3:4], v1 ; encoding: [0x00,0x00,0x68,0xdc,0x03,0x01,0x00,0x00] flat_store_dword v[3:4], v1 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_dword v[3:4], v1 ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00] flat_store_dwordx2 v[3:4], v[1:2] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CIVI: flat_store_dwordx2 v[3:4], v[1:2] ; encoding: [0x00,0x00,0x74,0xdc,0x03,0x01,0x00,0x00] flat_store_dwordx4 v[3:4], v[5:8] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_store_dwordx4 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00] // VI: flat_store_dwordx4 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00] flat_store_dwordx3 v[3:4], v[5:7] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_store_dwordx3 v[3:4], v[5:7] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00] // VI: flat_store_dwordx3 v[3:4], v[5:7] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00] flat_atomic_swap v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_swap v[3:4], v5 ; encoding: [0x00,0x00,0xc0,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_swap v[3:4], v5 ; encoding: [0x00,0x00,0x00,0xdd,0x03,0x05,0x00,0x00] flat_atomic_swap v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_swap v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc1,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_swap v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x01,0xdd,0x03,0x05,0x00,0x01] flat_atomic_cmpswap v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_cmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0xc4,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_cmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0x04,0xdd,0x03,0x05,0x00,0x00] flat_atomic_cmpswap v1, v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_cmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xc5,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_cmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x05,0xdd,0x03,0x05,0x00,0x01] flat_atomic_add v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_add v[3:4], v5 ; encoding: [0x00,0x00,0xc8,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_add v[3:4], v5 ; encoding: [0x00,0x00,0x08,0xdd,0x03,0x05,0x00,0x00] flat_atomic_add v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_add v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc9,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_add v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x09,0xdd,0x03,0x05,0x00,0x01] flat_atomic_sub v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_sub v[3:4], v5 ; encoding: [0x00,0x00,0xcc,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_sub v[3:4], v5 ; encoding: [0x00,0x00,0x0c,0xdd,0x03,0x05,0x00,0x00] flat_atomic_sub v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_sub v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xcd,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_sub v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x0d,0xdd,0x03,0x05,0x00,0x01] flat_atomic_smin v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smin v[3:4], v5 ; encoding: [0x00,0x00,0xd4,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_smin v[3:4], v5 ; encoding: [0x00,0x00,0x10,0xdd,0x03,0x05,0x00,0x00] flat_atomic_smin v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd5,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_smin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x11,0xdd,0x03,0x05,0x00,0x01] flat_atomic_umin v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umin v[3:4], v5 ; encoding: [0x00,0x00,0xd8,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_umin v[3:4], v5 ; encoding: [0x00,0x00,0x14,0xdd,0x03,0x05,0x00,0x00] flat_atomic_umin v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd9,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_umin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x15,0xdd,0x03,0x05,0x00,0x01] flat_atomic_smax v[3:4], v5, -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smax v[3:4], v5 ; encoding: [0x00,0x00,0xdc,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_smax v[3:4], v5 ; encoding: [0x00,0x00,0x18,0xdd,0x03,0x05,0x00,0x00] flat_atomic_smax v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xdd,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_smax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x19,0xdd,0x03,0x05,0x00,0x01] flat_atomic_umax v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umax v[3:4], v5 ; encoding: [0x00,0x00,0xe0,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_umax v[3:4], v5 ; encoding: [0x00,0x00,0x1c,0xdd,0x03,0x05,0x00,0x00] flat_atomic_umax v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe1,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_umax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x1d,0xdd,0x03,0x05,0x00,0x01] flat_atomic_and v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_and v[3:4], v5 ; encoding: [0x00,0x00,0xe4,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_and v[3:4], v5 ; encoding: [0x00,0x00,0x20,0xdd,0x03,0x05,0x00,0x00] flat_atomic_and v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_and v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe5,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_and v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x21,0xdd,0x03,0x05,0x00,0x01] flat_atomic_or v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_or v[3:4], v5 ; encoding: [0x00,0x00,0xe8,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_or v[3:4], v5 ; encoding: [0x00,0x00,0x24,0xdd,0x03,0x05,0x00,0x00] flat_atomic_or v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_or v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe9,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_or v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x25,0xdd,0x03,0x05,0x00,0x01] flat_atomic_xor v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_xor v[3:4], v5 ; encoding: [0x00,0x00,0xec,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_xor v[3:4], v5 ; encoding: [0x00,0x00,0x28,0xdd,0x03,0x05,0x00,0x00] flat_atomic_xor v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_xor v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xed,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_xor v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x29,0xdd,0x03,0x05,0x00,0x01] flat_atomic_inc v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_inc v[3:4], v5 ; encoding: [0x00,0x00,0xf0,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_inc v[3:4], v5 ; encoding: [0x00,0x00,0x2c,0xdd,0x03,0x05,0x00,0x00] flat_atomic_inc v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_inc v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf1,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_inc v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x2d,0xdd,0x03,0x05,0x00,0x01] flat_atomic_dec v[3:4], v5 -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_dec v[3:4], v5 ; encoding: [0x00,0x00,0xf4,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_dec v[3:4], v5 ; encoding: [0x00,0x00,0x30,0xdd,0x03,0x05,0x00,0x00] flat_atomic_dec v1, v[3:4], v5 glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_dec v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf5,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_dec v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x31,0xdd,0x03,0x05,0x00,0x01] flat_atomic_fcmpswap v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fcmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0xf8,0xdc,0x03,0x05,0x00,0x00] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fcmpswap v1, v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fcmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xf9,0xdc,0x03,0x05,0x00,0x01] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_swap_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_swap_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x40,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_swap_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x80,0xdd,0x03,0x05,0x00,0x00] flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x41,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x81,0xdd,0x03,0x05,0x00,0x01] flat_atomic_cmpswap_x2 v[3:4], v[5:8] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_cmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x44,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_cmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x84,0xdd,0x03,0x05,0x00,0x00] flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x45,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x85,0xdd,0x03,0x05,0x00,0x01] flat_atomic_add_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_add_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x48,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_add_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x88,0xdd,0x03,0x05,0x00,0x00] flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x49,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x89,0xdd,0x03,0x05,0x00,0x01] flat_atomic_sub_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_sub_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x4c,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_sub_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x8c,0xdd,0x03,0x05,0x00,0x00] flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x4d,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x8d,0xdd,0x03,0x05,0x00,0x01] flat_atomic_smin_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x54,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_smin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x90,0xdd,0x03,0x05,0x00,0x00] flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x55,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x91,0xdd,0x03,0x05,0x00,0x01] flat_atomic_umin_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x58,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_umin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x94,0xdd,0x03,0x05,0x00,0x00] flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x59,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x95,0xdd,0x03,0x05,0x00,0x01] flat_atomic_smax_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x5c,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_smax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x98,0xdd,0x03,0x05,0x00,0x00] flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x5d,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x99,0xdd,0x03,0x05,0x00,0x01] flat_atomic_umax_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x60,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_umax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x9c,0xdd,0x03,0x05,0x00,0x00] flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x61,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x9d,0xdd,0x03,0x05,0x00,0x01] flat_atomic_and_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_and_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x64,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_and_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa0,0xdd,0x03,0x05,0x00,0x00] flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x65,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa1,0xdd,0x03,0x05,0x00,0x01] flat_atomic_or_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_or_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x68,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_or_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa4,0xdd,0x03,0x05,0x00,0x00] flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x69,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa5,0xdd,0x03,0x05,0x00,0x01] flat_atomic_xor_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_xor_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x6c,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_xor_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa8,0xdd,0x03,0x05,0x00,0x00] flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x6d,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa9,0xdd,0x03,0x05,0x00,0x01] flat_atomic_inc_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_inc_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x70,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_inc_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xac,0xdd,0x03,0x05,0x00,0x00] flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x71,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xad,0xdd,0x03,0x05,0x00,0x01] flat_atomic_dec_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_dec_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x74,0xdd,0x03,0x05,0x00,0x00] // VI: flat_atomic_dec_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xb0,0xdd,0x03,0x05,0x00,0x00] flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x75,0xdd,0x03,0x05,0x00,0x01] // VI: flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xb1,0xdd,0x03,0x05,0x00,0x01] flat_atomic_fcmpswap_x2 v[3:4], v[5:8] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fcmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x78,0xdd,0x03,0x05,0x00,0x00] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fcmpswap_x2 v[1:2], v[3:4], v[5:8] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fcmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x79,0xdd,0x03,0x05,0x00,0x01] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fmin_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fmin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x7c,0xdd,0x03,0x05,0x00,0x00] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fmin_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fmin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x7d,0xdd,0x03,0x05,0x00,0x01] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fmax_x2 v[3:4], v[5:6] -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fmax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x80,0xdd,0x03,0x05,0x00,0x00] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU flat_atomic_fmax_x2 v[1:2], v[3:4], v[5:6] glc -// NOSI: error: +// NOSI: error: instruction not supported on this GPU // CI: flat_atomic_fmax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x81,0xdd,0x03,0x05,0x00,0x01] -// NOVI: error: +// NOVI: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/fma-mix.s b/llvm/test/MC/AMDGPU/fma-mix.s index 9d4c762bef557..3f510090ee58e 100644 --- a/llvm/test/MC/AMDGPU/fma-mix.s +++ b/llvm/test/MC/AMDGPU/fma-mix.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx904 -show-encoding %s | FileCheck -check-prefix=GFX9-FMAMIX %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s | FileCheck -check-prefix=GFX9-FMAMIX %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9-MADMIX-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9-MADMIX-ERR --implicit-check-not=error: %s v_fma_mix_f32 v0, v1, v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] @@ -20,45 +20,57 @@ v_fma_mixhi_f16 v0, v1, v2, v3 v_fma_mix_f32 v0, abs(v1), v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. -// FIXME: Better error -// GFX9-MADMIX-ERR: error: invalid operand for instruction +// FIXME: Improve error messages v_fma_mix_f32 v0, v1, abs(v2), v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, abs(v3) // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, -v1, v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, -v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, -v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, -abs(v1), v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, -abs(v2), v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, -abs(v3) // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3) // GFX9-FMAMIX: v_fma_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mixhi_f16 v0, -v1, abs(v2), -abs(v3) // GFX9-FMAMIX: v_fma_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mixlo_f16 v0, v1, v2, v3 clamp // GFX9-FMAMIX: v_fma_mixlo_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa1,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: invalid operand for instruction v_fma_mixhi_f16 v0, v1, v2, v3 clamp // GFX9-FMAMIX: v_fma_mixhi_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa2,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: invalid operand for instruction // // op_sel with non-packed instructions @@ -66,38 +78,50 @@ v_fma_mixhi_f16 v0, v1, v2, v3 clamp v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// FIXME: Better error -// GFX-MADMIX-ERR: error: unknown token in expression +// GFX9-MADMIX-ERR: error: not a valid operand. + +// FIXME: Improve error messages v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x08,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x10,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x00,0x20,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x00,0x38,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x14] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] // GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x1c] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-FMAMIX: v_fma_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa1,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-MADMIX-ERR: error: not a valid operand. v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-FMAMIX: v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa2,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-MADMIX-ERR: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx10-constant-bus.s b/llvm/test/MC/AMDGPU/gfx10-constant-bus.s index d2034ae1354b1..37e853c87be79 100644 --- a/llvm/test/MC/AMDGPU/gfx10-constant-bus.s +++ b/llvm/test/MC/AMDGPU/gfx10-constant-bus.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s //----------------------------------------------------------------------------------------- // On GFX10 we can use two scalar operands (except for 64-bit shift instructions) diff --git a/llvm/test/MC/AMDGPU/gfx1011_err.s b/llvm/test/MC/AMDGPU/gfx1011_err.s index e99716018c051..81c8c6254c037 100644 --- a/llvm/test/MC/AMDGPU/gfx1011_err.s +++ b/llvm/test/MC/AMDGPU/gfx1011_err.s @@ -1,50 +1,50 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1011 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX10 --implicit-check-not=error: %s v_dot8c_i32_i4 v5, v1, v2 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX10: error: +// GFX10: error: not a valid operand. s_getreg_b32 s2, hwreg(HW_REG_SHADER_CYCLES) -// GFX10: error: +// GFX10: error: specified hardware register is not supported on this GPU v_fma_legacy_f32 v0, v1, v2, v3 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] -// GFX10: error: +// GFX10: error: invalid instruction image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 -// GFX10: error: +// GFX10: error: invalid instruction image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] -// GFX10: error: +// GFX10: error: invalid instruction image_bvh64_intersect_ray v[4:7], v[9:24], s[4:7] a16 -// GFX10: error: +// GFX10: error: invalid instruction image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D -// GFX10: error: +// GFX10: error: not a valid operand. image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D glc -// GFX10: error: +// GFX10: error: not a valid operand. image_msaa_load v5, v[1:2], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_2D d16 -// GFX10: error: +// GFX10: error: not a valid operand. image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D -// GFX10: error: +// GFX10: error: not a valid operand. image_msaa_load v14, [v204,v11,v14,v19], s[40:47] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY -// GFX10: error: +// GFX10: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s index 29d906ec838be..b8e1afdfdb5b0 100644 --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -1,140 +1,140 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1031 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck --check-prefix=GFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1031 %s 2>&1 | FileCheck --check-prefix=GFX10 --implicit-check-not=error: %s v_dot8c_i32_i4 v5, v1, v2 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX10: error: +// GFX10: error: not a valid operand. v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX10: error: +// GFX10: error: not a valid operand. s_get_waveid_in_workgroup s0 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU s_memtime s[0:1] -// GFX10: error: +// GFX10: error: instruction not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) -// GFX10: error: +// GFX10: error: specified hardware register is not supported on this GPU v_mac_f32 v0, v1, v2 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_mad_f32 v0, v1, v2, v3 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_madak_f32 v0, v1, v2, 1 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_madmk_f32 v0, v1, 1, v2 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_mad_legacy_f32 v0, v1, v2, v3 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU v_mac_legacy_f32 v0, v1, v2 -// GFX10: error: +// GFX10: error: instruction not supported on this GPU ds_add_src2_u32 v1 offset:65535 gds -// GFX10: error: +// GFX10: error: not a valid operand. ds_add_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_add_src2_f32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_sub_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_rsub_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_inc_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_dec_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_i32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_i32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_u32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_and_src2_b32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_or_src2_b32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_xor_src2_b32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_f32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_f32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_add_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_sub_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_rsub_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_inc_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_dec_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_i64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_i64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_u64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_and_src2_b64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_or_src2_b64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_xor_src2_b64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_min_src2_f64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_max_src2_f64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_write_src2_b32 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. ds_write_src2_b64 v1 offset:65535 -// GFX10: error: +// GFX10: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_all.s b/llvm/test/MC/AMDGPU/gfx10_asm_all.s index d1bbde6539417..59c49220111ce 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_all.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // ENC_DS. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s index ce3cef52e8994..01159c365ebc2 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // GFX10: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s index b8ede28ec0763..e6985532bd1a1 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s v_mov_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] // GFX10: encoding: [0xe9,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_err.s index 251dde827b71c..978ec345f2b05 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_err.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_err.s @@ -1,9 +1,9 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx601 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX6-7,GFX6-8,GFX6-9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx701 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX6-7,GFX6-8,GFX6-9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx801 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX6-8,GFX6-9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX6-9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx601 %s 2>&1 | FileCheck --check-prefixes=GFX6-7,GFX6-8,GFX6-9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx701 %s 2>&1 | FileCheck --check-prefixes=GFX6-7,GFX6-8,GFX6-9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx801 %s 2>&1 | FileCheck --check-prefixes=GFX6-8,GFX6-9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefixes=GFX6-9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10 --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // ENC_DS. @@ -124,6 +124,7 @@ s_bitreplicate_b64_b32 s[0:1], s2 s_set_gpr_idx_idx s0 // GFX10: error: instruction not supported on this GPU +// GFX6-7: error: instruction not supported on this GPU // GFX6, GFX7, GFX8, GFX9. @@ -167,6 +168,7 @@ s_pack_hh_b32_b16 s0, s1, s2 s_rfe_restore_b64 s[0:1], s2 // GFX10: error: instruction not supported on this GPU +// GFX6-7: error: instruction not supported on this GPU // GFX6, GFX7, GFX8, GFX9. diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s index 3a33ed4b8a608..f99a295362369 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_mimg_err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=NOGFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOGFX10 --implicit-check-not=error: %s ; TODO: more helpful error message for missing dim operand image_load v[0:3], v0, s[0:7] dmask:0xf unorm diff --git a/llvm/test/MC/AMDGPU/gfx8_asm_all.s b/llvm/test/MC/AMDGPU/gfx8_asm_all.s index 1610bfa7d92a9..1362a4f871b2d 100644 --- a/llvm/test/MC/AMDGPU/gfx8_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx8_asm_all.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=CHECK-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=CHECK-ERR --implicit-check-not=error: %s ds_add_u32 v1, v2 offset:65535 // CHECK: [0xff,0xff,0x00,0xd8,0x01,0x02,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx9-asm-err.s b/llvm/test/MC/AMDGPU/gfx9-asm-err.s index ff63cdf2a35a7..de0930cf952a2 100644 --- a/llvm/test/MC/AMDGPU/gfx9-asm-err.s +++ b/llvm/test/MC/AMDGPU/gfx9-asm-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9ERR --implicit-check-not=error: %s v_cvt_f16_u16_e64 v5, 0.5 // GFX9ERR: error: invalid literal operand diff --git a/llvm/test/MC/AMDGPU/gfx9-vop2be-literal.s b/llvm/test/MC/AMDGPU/gfx9-vop2be-literal.s index f996c4e3c0a7e..d1c7a759385a9 100644 --- a/llvm/test/MC/AMDGPU/gfx9-vop2be-literal.s +++ b/llvm/test/MC/AMDGPU/gfx9-vop2be-literal.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX9-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9-ERR --implicit-check-not=error: %s v_addc_co_u32_e32 v3, vcc, 12345, v3, vcc // GFX9-ERR: error: invalid operand (violates constant bus restrictions) diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_all.s b/llvm/test/MC/AMDGPU/gfx9_asm_all.s index b3b8bf86a131b..93050e4c292b6 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_all.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_all.s @@ -1,6 +1,6 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=+d16-preserves-unused-bits -show-encoding %s | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=+d16-preserves-unused-bits %s 2>&1 | FileCheck -check-prefix=CHECK-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=+d16-preserves-unused-bits %s 2>&1 | FileCheck -check-prefix=CHECK-ERR --implicit-check-not=error: %s ds_add_u32 v1, v2 offset:65535 // CHECK: [0xff,0xff,0x00,0xd8,0x01,0x02,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s index 546ed25a60ebc..14800de71cbd2 100644 --- a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s +++ b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s @@ -10,10 +10,6 @@ v2, v4, v6 # CHECK-NEXT: v2, v4, v6 # CHECK-NEXT: ^ -# CHECK: error: failed parsing operand -# CHECK-NEXT: v2, v4, v6 -# CHECK-NEXT: ^ - # We don't want to see a suggestion here; the edit distance is too large to # give sensible suggestions: diff --git a/llvm/test/MC/AMDGPU/lds_direct-err.s b/llvm/test/MC/AMDGPU/lds_direct-err.s index 578461bc35ad5..3e5bfe48ca0a0 100644 --- a/llvm/test/MC/AMDGPU/lds_direct-err.s +++ b/llvm/test/MC/AMDGPU/lds_direct-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: //---------------------------------------------------------------------------// // lds_direct may be used only with vector ALU instructions diff --git a/llvm/test/MC/AMDGPU/lds_direct-gfx10.s b/llvm/test/MC/AMDGPU/lds_direct-gfx10.s index f3174553e5793..61e4de3e46914 100644 --- a/llvm/test/MC/AMDGPU/lds_direct-gfx10.s +++ b/llvm/test/MC/AMDGPU/lds_direct-gfx10.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX10 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck %s --check-prefix=NOGFX10 --implicit-check-not=error: v_readfirstlane_b32 s0, lds_direct // GFX10: v_readfirstlane_b32 s0, src_lds_direct ; encoding: [0xfe,0x04,0x00,0x7e] diff --git a/llvm/test/MC/AMDGPU/literal16-err.s b/llvm/test/MC/AMDGPU/literal16-err.s index f9b3d40f84d94..6a2f1eb23a474 100644 --- a/llvm/test/MC/AMDGPU/literal16-err.s +++ b/llvm/test/MC/AMDGPU/literal16-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s v_add_f16 v1, 0xfffff, v2 // NOVI: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s index dd9d2903a1ae2..b666b7d1cb780 100644 --- a/llvm/test/MC/AMDGPU/literals.s +++ b/llvm/test/MC/AMDGPU/literals.s @@ -4,11 +4,11 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=GFX89 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=GFX89 --check-prefix=GFX9 -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSI --check-prefix=NOSICI --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSI --check-prefix=NOSICI --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSICI --check-prefix=NOCIVI --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSICIVI --check-prefix=NOVI --check-prefix=NOGFX89 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOGFX89 --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSI --check-prefix=NOSICI --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSI --check-prefix=NOSICI --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSICI --check-prefix=NOCIVI --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOSICIVI --check-prefix=NOVI --check-prefix=NOGFX89 --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGCN --check-prefix=NOGFX89 --check-prefix=NOGFX9 --implicit-check-not=error: //---------------------------------------------------------------------------// // fp literal, expected fp operand @@ -640,132 +640,133 @@ v_ceil_f32_sdwa v5, |execz| dst_sel:DWORD src0_sel:DWORD // named inline values: shared_base, shared_limit, private_base, etc //---------------------------------------------------------------------------// -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xeb] buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_add_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81] s_add_i32 s0, src_shared_base, s0 -// NOSICIVI: error: failed parsing operand. + + + + + + +// NOSICIVI: error: not a valid operand // GFX9: s_add_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81] s_add_i32 s0, src_shared_limit, s0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_add_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81] s_add_i32 s0, src_private_base, s0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_add_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81] s_add_i32 s0, src_private_limit, s0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_add_i32 s0, src_pops_exiting_wave_id, s0 ; encoding: [0xef,0x00,0x00,0x81] s_add_i32 s0, src_pops_exiting_wave_id, s0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_shared_base -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_shared_limit -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_private_base -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_private_limit -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id ; encoding: [0x00,0xef,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_add_u16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4c] v_add_u16 v0, src_shared_base, v0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xeb,0x06,0x86,0x06] v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xd6,0x01,0x4c,0x00,0x06,0x06,0x86] v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_add_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x68] v_add_u32 v0, src_shared_base, v0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_add_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xeb,0x00,0x02,0x00] v_add_u32_e64 v0, src_shared_base, v0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_cmp_eq_i64_e32 vcc, src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0xc4,0x7d] v_cmp_eq_i64 vcc, src_shared_base, v[0:1] -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x5a] v_max_f16 v0, src_shared_base, v0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_max_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x16] v_max_f32 v0, src_shared_base, v0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00] v_max_f64 v[0:1], src_shared_base, v[0:1] -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x8f,0xd3,0xeb,0x00,0x02,0x18] v_pk_add_f16 v0, src_shared_base, v0 -// NOSICI: error: not a valid operand -// NOVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20] v_ceil_f16 v0, neg(src_shared_base) -// NOSICI: error: not a valid operand -// NOVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00] v_ceil_f16 v0, abs(src_shared_base) -// NOSOCIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00] v_ceil_f64 v[5:6], |src_shared_base| -// NOSI: error: not a valid operand -// NOCIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20] v_ceil_f64 v[5:6], -src_shared_base -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x5d,0xd1,0xeb,0x00,0x00,0x20] v_ceil_f32 v0, -src_shared_base -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x5d,0xd1,0xeb,0x00,0x00,0x00] v_ceil_f32 v0, |src_shared_base| -// NOSICI: error: not a valid operand. -// NOVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE -// NOSICI: error: not a valid operand. -// NOVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0x96,0x00] v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0x86,0x00] v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD src0_sel:DWORD -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD @@ -773,7 +774,7 @@ v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD // named inline values compete with other scalars for constant bus access //---------------------------------------------------------------------------// -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_add_u32 v0, private_base, s0 @@ -782,17 +783,17 @@ v_add_u32 v0, private_base, s0 v_add_u32 v0, scc, s0 // v_div_fmas implicitly reads VCC -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, shared_base, v0, v1 // v_div_fmas implicitly reads VCC -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, v0, shared_limit, v1 // v_div_fmas implicitly reads VCC -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, v0, v1, private_limit @@ -809,29 +810,29 @@ v_div_fmas_f32 v0, v0, scc, v1 v_div_fmas_f32 v0, v0, v1, vccz // v_addc_co_u32 implicitly reads VCC (VOP2) -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_addc_co_u32 v0, vcc, shared_base, v0, vcc -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_madak_f32 v0, shared_base, v0, 0x11213141 // NOGCN: error: invalid operand (violates constant bus restrictions) v_madak_f32 v0, scc, v0, 0x11213141 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], private_base, private_limit -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], private_base, s0 // NOGCN: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], execz, s0 -// NOSICIVI: error: failed parsing operand. +// NOSICIVI: error: not a valid operand // NOGFX9: error: invalid operand (violates constant bus restrictions) v_pk_add_f16 v255, private_base, private_limit diff --git a/llvm/test/MC/AMDGPU/literalv216-err.s b/llvm/test/MC/AMDGPU/literalv216-err.s index 3a1c49b136fd2..eefe1b343c3a1 100644 --- a/llvm/test/MC/AMDGPU/literalv216-err.s +++ b/llvm/test/MC/AMDGPU/literalv216-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=GFX10 --implicit-check-not=error: %s v_pk_add_f16 v1, -17, v2 // GFX9: error: invalid literal operand @@ -38,12 +38,9 @@ v_pk_mad_i16 v5, 0x3c00, 0x4000, 2 v_pk_mad_i16 v5, 0x3c00, 3, 2 // GFX9: error: invalid literal operand -// GFX10-NOT: error: v_pk_mad_i16 v5, 3, 0x3c00, 2 // GFX9: error: invalid literal operand -// GFX10-NOT: error: v_pk_mad_i16 v5, 3, 2, 0x3c00 // GFX9: error: invalid literal operand -// GFX10-NOT: error: diff --git a/llvm/test/MC/AMDGPU/literalv216.s b/llvm/test/MC/AMDGPU/literalv216.s index 9bcc1341774a0..ac05c280f0490 100644 --- a/llvm/test/MC/AMDGPU/literalv216.s +++ b/llvm/test/MC/AMDGPU/literalv216.s @@ -1,8 +1,8 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOGFX9 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOGFX10 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s -check-prefix=NOGFX9 --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck %s -check-prefix=NOGFX10 --implicit-check-not=error: //===----------------------------------------------------------------------===// // Inline constants diff --git a/llvm/test/MC/AMDGPU/mad-mix.s b/llvm/test/MC/AMDGPU/mad-mix.s index 539de050f4d75..0a261a922725d 100644 --- a/llvm/test/MC/AMDGPU/mad-mix.s +++ b/llvm/test/MC/AMDGPU/mad-mix.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9-MADMIX %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx904 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9-FMAMIX-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9-FMAMIX-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx904 %s 2>&1 | FileCheck -check-prefix=GFX9-FMAMIX-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 %s 2>&1 | FileCheck -check-prefix=GFX9-FMAMIX-ERR --implicit-check-not=error: %s v_mad_mix_f32 v0, v1, v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] @@ -20,45 +20,57 @@ v_mad_mixhi_f16 v0, v1, v2, v3 v_mad_mix_f32 v0, abs(v1), v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. -// FIXME: Better error -// GFX9-FMAMIX-ERR: error: invalid operand for instruction +// FIXME: Improve diagnistics v_mad_mix_f32 v0, v1, abs(v2), v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, abs(v3) // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, -v1, v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, -v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, -v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, -abs(v1), v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, -abs(v2), v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, -abs(v3) // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mixlo_f16 v0, abs(v1), -v2, abs(v3) // GFX9-MADMIX: v_mad_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mixhi_f16 v0, -v1, abs(v2), -abs(v3) // GFX9-MADMIX: v_mad_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mixlo_f16 v0, v1, v2, v3 clamp // GFX9-MADMIX: v_mad_mixlo_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa1,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: invalid operand for instruction v_mad_mixhi_f16 v0, v1, v2, v3 clamp // GFX9-MADMIX: v_mad_mixhi_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa2,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: invalid operand for instruction // // op_sel with non-packed instructions @@ -66,38 +78,50 @@ v_mad_mixhi_f16 v0, v1, v2, v3 clamp v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] -// FIXME: Better error -// GFX-FMAMIX-ERR: error: unknown token in expression +// GFX9-FMAMIX-ERR: error: not a valid operand. + +// FIXME: Improve diagnistics v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x08,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x10,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x00,0x20,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x00,0x38,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x14] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x04] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] // GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x1c] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-MADMIX: v_mad_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa1,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-FMAMIX-ERR: error: not a valid operand. v_mad_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp // GFX9-MADMIX: v_mad_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa2,0xd3,0x01,0x05,0x0e,0x0c] +// GFX9-FMAMIX-ERR: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/mai-err.s b/llvm/test/MC/AMDGPU/mai-err.s index 9b9b733428e48..6f3361c0c9f3d 100644 --- a/llvm/test/MC/AMDGPU/mai-err.s +++ b/llvm/test/MC/AMDGPU/mai-err.s @@ -1,527 +1,700 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck -check-prefix=GFX908 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX900 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck -check-prefix=GFX908 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX900 --implicit-check-not=error: %s v_accvgpr_read_b32 v0, v0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_read_b32 a0, a0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_read_b32 v0, 1 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_read_b32 v0, s0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_read_b32 v0, a0 // GFX900: error: instruction not supported on this GPU v_accvgpr_write_b32 v0, v0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_write_b32 a0, a0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_write_b32 a0, s0 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_write_b32 a0, 65 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_accvgpr_write_b32 a0, v0 // GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 v[0:31], v0, v1, a[1:32] // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_mfma_f32_32x32x1f32 a[0:31], v0, v1, v[1:32] // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_mfma_f32_32x32x1f32 a[0:31], s0, v1, a[1:32] // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_mfma_f32_32x32x1f32 a[0:31], 1, v1, a[1:32] // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_mfma_f32_32x32x1f32 a[0:31], v0, v1, 65 // GFX908: error: invalid operand for instruction +// GFX900: error: invalid operand for instruction v_mfma_f32_32x32x1f32 a[0:31], v0, v1, 0 // GFX900: error: instruction not supported on this GPU +// GFX908: error: invalid literal operand v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 // GFX908: error: invalid literal operand +// GFX900: error: instruction not supported on this GPU v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 // GFX908: error: invalid literal operand +// GFX900: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/mai.s b/llvm/test/MC/AMDGPU/mai.s index 09eddb0d258c8..c02139a616fc4 100644 --- a/llvm/test/MC/AMDGPU/mai.s +++ b/llvm/test/MC/AMDGPU/mai.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck -check-prefix=GFX908 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s 2>&1 | FileCheck -check-prefix=NOGFX908 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck -check-prefix=NOGFX908 --implicit-check-not=error: %s v_accvgpr_read_b32 v2, a0 // GFX908: v_accvgpr_read_b32 v2, a0 ; encoding: [0x02,0x00,0xd8,0xd3,0x00,0x01,0x00,0x08] diff --git a/llvm/test/MC/AMDGPU/mimg-err.s b/llvm/test/MC/AMDGPU/mimg-err.s index 822ffdd65351e..9c8a9c8abf643 100644 --- a/llvm/test/MC/AMDGPU/mimg-err.s +++ b/llvm/test/MC/AMDGPU/mimg-err.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: //===----------------------------------------------------------------------===// // Image Load/Store diff --git a/llvm/test/MC/AMDGPU/mimg.s b/llvm/test/MC/AMDGPU/mimg.s index 83835270a1d4a..403ee7d62cc01 100644 --- a/llvm/test/MC/AMDGPU/mimg.s +++ b/llvm/test/MC/AMDGPU/mimg.s @@ -5,12 +5,12 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_1 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=GFX89 -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_0 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_1 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_0 --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_1 --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: //===----------------------------------------------------------------------===// // Image Load/Store @@ -201,7 +201,7 @@ image_store v[5:8], v[1:2], s[8:15] dmask:0xf unorm a16 // NOSICI: error: a16 modifier is not supported on this GPU // NOVI: error: a16 modifier is not supported on this GPU -/===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// // Image Load/Store: a16 & d16 //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AMDGPU/mtbuf-gfx10.s b/llvm/test/MC/AMDGPU/mtbuf-gfx10.s index 8ea86e7de9657..2fdad57b19295 100644 --- a/llvm/test/MC/AMDGPU/mtbuf-gfx10.s +++ b/llvm/test/MC/AMDGPU/mtbuf-gfx10.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // Positive tests for legacy format syntax. diff --git a/llvm/test/MC/AMDGPU/mtbuf.s b/llvm/test/MC/AMDGPU/mtbuf.s index f7fdd29bb83b8..0653b591d69d7 100644 --- a/llvm/test/MC/AMDGPU/mtbuf.s +++ b/llvm/test/MC/AMDGPU/mtbuf.s @@ -2,9 +2,9 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICI-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICI-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,VI-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,VI-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // Positive tests for legacy dfmt/nfmt syntax. diff --git a/llvm/test/MC/AMDGPU/mubuf-gfx9.s b/llvm/test/MC/AMDGPU/mubuf-gfx9.s index d9c3fc39cfd8d..10909c63aff7a 100644 --- a/llvm/test/MC/AMDGPU/mubuf-gfx9.s +++ b/llvm/test/MC/AMDGPU/mubuf-gfx9.s @@ -1,5 +1,5 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR --implicit-check-not=error: %s buffer_load_ubyte_d16 v1, off, s[4:7], s1 // VI-ERR: error: instruction not supported on this GPU @@ -39,23 +39,23 @@ buffer_load_format_d16_hi_x v5, off, s[8:11], s3 buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 // GFX9: buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 idxen offset:4095 // GFX9: buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 offen offset:4095 // GFX9: buffer_load_format_d16_hi_x v5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 glc // GFX9: buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x98,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 slc // GFX9: buffer_load_format_d16_hi_x v5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x9a,0xe0,0x00,0x05,0x02,0x03] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_store_format_d16_hi_x v255, off, s[12:15], s4 // GFX9: buffer_store_format_d16_hi_x v255, off, s[12:15], s4 ; encoding: [0x00,0x00,0x9c,0xe0,0x00,0xff,0x03,0x04] @@ -63,20 +63,20 @@ buffer_store_format_d16_hi_x v255, off, s[12:15], s4 buffer_store_format_d16_hi_x v255, off, s[12:15], s4 offset:4095 // GFX9: buffer_store_format_d16_hi_x v255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe0,0x00,0xff,0x03,0x04] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 idxen offset:4095 // GFX9: buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x9c,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 offen offset:4095 // GFX9: buffer_store_format_d16_hi_x v1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x9c,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 glc // GFX9: buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x9c,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error +// VI-ERR: error: not a valid operand. buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 slc // GFX9: buffer_store_format_d16_hi_x v1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x9e,0xe0,0x00,0x01,0x03,0x04] -// VI-ERR: error +// VI-ERR: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/mubuf.s b/llvm/test/MC/AMDGPU/mubuf.s index 6c0fdb1408282..a07a0a2aab180 100644 --- a/llvm/test/MC/AMDGPU/mubuf.s +++ b/llvm/test/MC/AMDGPU/mubuf.s @@ -2,9 +2,9 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI -check-prefix=NOSICIVI -check-prefix=NOSICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOCI -check-prefix=NOSICIVI -check-prefix=NOSICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI -check-prefix=NOSICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI -check-prefix=NOSICIVI -check-prefix=NOSICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOCI -check-prefix=NOSICIVI -check-prefix=NOSICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI -check-prefix=NOSICIVI --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // Test for different operand combinations diff --git a/llvm/test/MC/AMDGPU/out-of-range-registers.s b/llvm/test/MC/AMDGPU/out-of-range-registers.s index 53e0f65f0cb15..c7cd03470f9fc 100644 --- a/llvm/test/MC/AMDGPU/out-of-range-registers.s +++ b/llvm/test/MC/AMDGPU/out-of-range-registers.s @@ -1,12 +1,12 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICIVI9-ERR,SIVICI-ERR,SI-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICIVI9-ERR,SIVICI-ERR,CIVI9-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX9-ERR,SICIVI9-ERR,CIVI9-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICIVI9-ERR,SIVICI-ERR,SI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,SICIVI9-ERR,SIVICI-ERR,CIVI9-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX9-ERR,SICIVI9-ERR,CIVI9-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX10-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=SIVICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=SIVICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=SIVICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=SIVICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s s_add_i32 s106, s0, s1 // GCN-ERR: error: not a valid operand @@ -84,21 +84,25 @@ s_mov_b32 ttmp12, 0 // SICIVI: error: not a valid operand // GFX9: s_mov_b32 ttmp12, 0 ; encoding: // GFX10: s_mov_b32 ttmp12, 0 ; encoding: +// SIVICI-ERR: error: not a valid operand. s_mov_b32 ttmp15, 0 // SICIVI: error: not a valid operand // GFX9: s_mov_b32 ttmp15, 0 ; encoding: // GFX10: s_mov_b32 ttmp15, 0 ; encoding: +// SIVICI-ERR: error: not a valid operand. s_mov_b32 flat_scratch_lo, 0 // SI-ERR: error: not a valid operand // CIVI9: s_mov_b32 flat_scratch_lo, 0 ; encoding: // GFX10-ERR: error: not a valid operand +// GFX9: s_mov_b32 flat_scratch_lo, 0 ; encoding: [0x80,0x00,0xe6,0xbe] s_mov_b32 flat_scratch_hi, 0 // SI-ERR: error: not a valid operand // CIVI9: s_mov_b32 flat_scratch_hi, 0 ; encoding: // GFX10-ERR: error: not a valid operand +// GFX9: s_mov_b32 flat_scratch_hi, 0 ; encoding: [0x80,0x00,0xe7,0xbe] s_mov_b32 tma_lo, 0 // SIVICI: s_mov_b32 tma_lo, 0 ; encoding: diff --git a/llvm/test/MC/AMDGPU/reg-syntax-err.s b/llvm/test/MC/AMDGPU/reg-syntax-err.s index 8d58630ce8885..dce9375a47111 100644 --- a/llvm/test/MC/AMDGPU/reg-syntax-err.s +++ b/llvm/test/MC/AMDGPU/reg-syntax-err.s @@ -1,73 +1,73 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s s_mov_b32 s1, s 1 // NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b32 s1, s[0 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[0:0 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s[0 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s[0:1] 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s0, 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s999 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[1:2] 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[0:2] 1 // NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b32 s1, xnack_mask_lo 1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s s0 // NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b32 s1, s[0 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[0:0 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s[0 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s[0:1] s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, [s0, s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s999 s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[1:2] s0 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[0:2] vcc_lo // NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b32 s1, xnack_mask_lo s1 -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. exp mrt0 v1, v2, v3, v4000 off -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. v_add_f64 v[0:1], v[0:1], v[0xF00000001:0x2] -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. v_add_f64 v[0:1], v[0:1], v[0x1:0xF00000002] -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. s_mov_b32 s1, s[0:-1] -// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand +// NOVI: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/reg-syntax-extra.s b/llvm/test/MC/AMDGPU/reg-syntax-extra.s index 4e8216c88d67c..528247f562399 100644 --- a/llvm/test/MC/AMDGPU/reg-syntax-extra.s +++ b/llvm/test/MC/AMDGPU/reg-syntax-extra.s @@ -1,48 +1,61 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICI --check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s + +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefix=NOSICI --check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=NOGCN --check-prefix=NOVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=NOGCN --check-prefix=NOGFX10 --implicit-check-not=error: %s s_mov_b32 [ttmp5], [ttmp3] // SICI: s_mov_b32 ttmp5, ttmp3 ; encoding: [0x73,0x03,0xf5,0xbe] // VI: s_mov_b32 ttmp5, ttmp3 ; encoding: [0x73,0x00,0xf5,0xbe] +// GFX10: s_mov_b32 ttmp5, ttmp3 ; encoding: [0x6f,0x03,0xf1,0xbe] s_mov_b64 [ttmp4,ttmp5], [ttmp2,ttmp3] // SICI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x04,0xf4,0xbe] // VI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x01,0xf4,0xbe] +// GFX10: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x6e,0x04,0xf0,0xbe] s_mov_b64 ttmp[4:5], ttmp[2:3] // SICI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x04,0xf4,0xbe] // VI: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x72,0x01,0xf4,0xbe] +// GFX10: s_mov_b64 ttmp[4:5], ttmp[2:3] ; encoding: [0x6e,0x04,0xf0,0xbe] s_mov_b64 [s6,s7], s[8:9] // SICI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x04,0x86,0xbe] // VI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x01,0x86,0xbe] +// GFX10: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x04,0x86,0xbe] s_mov_b64 s[6:7], [s8,s9] // SICI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x04,0x86,0xbe] // VI: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x01,0x86,0xbe] +// GFX10: s_mov_b64 s[6:7], s[8:9] ; encoding: [0x08,0x04,0x86,0xbe] s_mov_b64 [exec_lo,exec_hi], s[2:3] // SICI: s_mov_b64 exec, s[2:3] ; encoding: [0x02,0x04,0xfe,0xbe] // VI: s_mov_b64 exec, s[2:3] ; encoding: [0x02,0x01,0xfe,0xbe] +// GFX10: s_mov_b64 exec, s[2:3] ; encoding: [0x02,0x04,0xfe,0xbe] s_mov_b64 [flat_scratch_lo,flat_scratch_hi], s[2:3] -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: s_mov_b64 flat_scratch, s[2:3] ; encoding: [0x02,0x01,0xe6,0xbe] +// NOGFX10: error: not a valid operand. s_mov_b64 [vcc_lo,vcc_hi], s[2:3] // SICI: s_mov_b64 vcc, s[2:3] ; encoding: [0x02,0x04,0xea,0xbe] // VI: s_mov_b64 vcc, s[2:3] ; encoding: [0x02,0x01,0xea,0xbe] +// GFX10: s_mov_b64 vcc, s[2:3] ; encoding: [0x02,0x04,0xea,0xbe] s_mov_b64 [tba_lo,tba_hi], s[2:3] // SICI: s_mov_b64 tba, s[2:3] ; encoding: [0x02,0x04,0xec,0xbe] // VI: s_mov_b64 tba, s[2:3] ; encoding: [0x02,0x01,0xec,0xbe] +// NOGFX10: error: not a valid operand. s_mov_b64 [tma_lo,tma_hi], s[2:3] // SICI: s_mov_b64 tma, s[2:3] ; encoding: [0x02,0x04,0xee,0xbe] // VI: s_mov_b64 tma, s[2:3] ; encoding: [0x02,0x01,0xee,0xbe] +// NOGFX10: error: not a valid operand. v_mov_b32_e32 [v1], [v2] // GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e] @@ -50,80 +63,109 @@ v_mov_b32_e32 [v1], [v2] v_rcp_f64 [v1,v2], [v2,v3] // SICI: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x5f,0x02,0x7e] // VI: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x4b,0x02,0x7e] +// GFX10: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x5f,0x02,0x7e] buffer_load_dwordx4 [v1,v2,v3,v4], off, [s4,s5,s6,s7], s1 // SICI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01] // VI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01] +// GFX10: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01] buffer_load_dword v1, off, [ttmp4,ttmp5,ttmp6,ttmp7], s1 // SICI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x01] // VI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x01] +// GFX10: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1c,0x01] buffer_store_format_xyzw v[1:4], off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1 // SICI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71] // VI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71] +// GFX10: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1c,0x6d] buffer_load_ubyte v1, off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1 // SICI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1d,0x71] // VI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x1d,0x71] +// GFX10: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1c,0x6d] buffer_store_dwordx4 v[1:4], off, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp1 // SICI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1d,0x71] // VI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x1d,0x71] +// GFX10: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1c,0x6d] s_load_dwordx4 [ttmp4,ttmp5,ttmp6,ttmp7], [ttmp2,ttmp3], ttmp4 // SICI: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x74,0x72,0xba,0xc0] // VI: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x39,0x1d,0x08,0xc0,0x74,0x00,0x00,0x00] +// GFX10: s_load_dwordx4 ttmp[4:7], ttmp[2:3], ttmp4 ; encoding: [0x37,0x1c,0x08,0xf4,0x00,0x00,0x00,0xe0] s_buffer_load_dword ttmp1, [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4 // SICI: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x74,0xf4,0x38,0xc2] // VI: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x7a,0x1c,0x20,0xc0,0x74,0x00,0x00,0x00] +// GFX10: s_buffer_load_dword ttmp1, ttmp[4:7], ttmp4 ; encoding: [0x78,0x1b,0x20,0xf4,0x00,0x00,0x00,0xe0] s_buffer_load_dwordx4 [ttmp8,ttmp9,ttmp10,ttmp11], [ttmp4,ttmp5,ttmp6,ttmp7], ttmp4 // SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2] // VI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00] +// GFX10: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x38,0x1d,0x28,0xf4,0x00,0x00,0x00,0xe0] s_buffer_load_dwordx4 [ttmp[8],ttmp[8+1],ttmp[5*2],ttmp[(3+2)*2+1]], ttmp[45/11:(33+45)/11], ttmp4 // SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2] // VI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00] +// GFX10: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x38,0x1d,0x28,0xf4,0x00,0x00,0x00,0xe0] s_buffer_load_dwordx4 ttmp[7+1:(3+2)*2+1], [ttmp[45/11],ttmp[5],ttmp6,ttmp[(33+45)/11]], ttmp4 // SICI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x74,0x74,0xbc,0xc2] // VI: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x3a,0x1e,0x28,0xc0,0x74,0x00,0x00,0x00] +// GFX10: s_buffer_load_dwordx4 ttmp[8:11], ttmp[4:7], ttmp4 ; encoding: [0x38,0x1d,0x28,0xf4,0x00,0x00,0x00,0xe0] flat_load_dword v[8:8], v[2:3] -// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x30,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dword v[63/8+1:65/8], v[2:3] -// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x30,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dword v8, v[2*2-2:(3+7)/3] -// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x30,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dword v[63/8+1], v[2:3] -// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// VI: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dword v8, v[2:3] ; encoding: [0x00,0x00,0x30,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dwordx4 v[8:11], v[2*2-2:(3*3-6)] // VI: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x38,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dwordx4 v[8/2+4:11/2+6], v[2:3] // VI: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x38,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU flat_load_dwordx4 [v[8/2+4],v9,v[10],v[11/2+6]], v[2:3] // VI: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x00,0x08] +// GFX10: flat_load_dwordx4 v[8:11], v[2:3] ; encoding: [0x00,0x00,0x38,0xdc,0x02,0x00,0x7d,0x08] +// NOSICI: error: instruction not supported on this GPU v_mul_f32 v0, null, v2 -// NOSICIVI: error: -// GFX10: v_mul_f32_e32 v0, null, v2 ; encoding: [0x7d,0x04,0x00,0x10] +// NOSICIVI: error: not a valid operand. +// GFX10: v_mul_f32_e32 v0, null, v2 ; encoding: [0x7d,0x04,0x00,0x10] +// NOVI: error: not a valid operand. v_mul_f64 v[0:1], null, null -// NOSICIVI: error: -// GFX10: v_mul_f64 v[0:1], null, null ; encoding: [0x00,0x00,0x65,0xd5,0x7d,0xfa,0x00,0x00] +// NOSICIVI: error: not a valid operand. +// GFX10: v_mul_f64 v[0:1], null, null ; encoding: [0x00,0x00,0x65,0xd5,0x7d,0xfa,0x00,0x00] +// NOVI: error: not a valid operand. s_add_u32 null, null, null -// NOSICIVI: error: -// GFX10: s_add_u32 null, null, null ; encoding: [0x7d,0x7d,0x7d,0x80] +// NOSICIVI: error: not a valid operand. +// GFX10: s_add_u32 null, null, null ; encoding: [0x7d,0x7d,0x7d,0x80] +// NOVI: error: not a valid operand. s_not_b64 s[2:3], null -// NOSICIVI: error: -// GFX10: s_not_b64 s[2:3], null ; encoding: [0x7d,0x08,0x82,0xbe] +// NOSICIVI: error: not a valid operand. +// GFX10: s_not_b64 s[2:3], null ; encoding: [0x7d,0x08,0x82,0xbe] +// NOVI: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/regression/bug28538.s b/llvm/test/MC/AMDGPU/regression/bug28538.s index 59fac226343d8..f9cdb157bbb11 100644 --- a/llvm/test/MC/AMDGPU/regression/bug28538.s +++ b/llvm/test/MC/AMDGPU/regression/bug28538.s @@ -1,12 +1,12 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOCIVI --check-prefix=NOVI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOCIVI --check-prefix=NOVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: -// NOSICI: error: +// NOSICI: error: not a valid operand. // NOVI: error: failed parsing operand v_mov_b32 v0, v0 row_bcast:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // NOVI: error: failed parsing operand v_mov_b32 v0, v0 row_bcast:13 diff --git a/llvm/test/MC/AMDGPU/smem-err.s b/llvm/test/MC/AMDGPU/smem-err.s index 83cfeb81b6eef..5f62318a1ac7b 100644 --- a/llvm/test/MC/AMDGPU/smem-err.s +++ b/llvm/test/MC/AMDGPU/smem-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s s_memtime exec // NOVI: :11: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s index dcff79fef529e..4d81929b415e0 100644 --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -3,12 +3,12 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX1012 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI -check-prefix=NOSICIVIGFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=NOGFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=NOSICI -check-prefix=NOSICIVI -check-prefix=NOSICIGFX10 -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI -check-prefix=NOSICIVIGFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=NOGFX9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck -check-prefix=NOSICIGFX10 -check-prefix=NOGFX9 --implicit-check-not=error: %s s_dcache_wb // GFX89: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] @@ -105,14 +105,17 @@ s_store_dword tma_hi, s[2:3], s4 s_load_dword s1, s[2:3], 0xfc glc // GFX89: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x03,0xc0,0xfc,0x00,0x00,0x00] // GFX10: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x01,0xf4,0xfc,0x00,0x00,0xfa] +// SICI: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0xfc,0x83,0x00,0xc0] s_load_dword s1, s[2:3], s4 glc // GFX89: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xc0,0x04,0x00,0x00,0x00] // GFX10: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xf4,0x00,0x00,0x00,0x08] +// SICI: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x04,0x82,0x00,0xc0] s_buffer_store_dword s10, s[92:95], m0 // GFX89: s_buffer_store_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU +// GFX10: s_buffer_store_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x60,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dword tba_lo, s[92:95], m0 // VI: s_buffer_store_dword tba_lo, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] @@ -138,14 +141,17 @@ s_buffer_store_dword ttmp0, s[92:95], m0 // VI: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1c,0x60,0xc0,0x7c,0x00,0x00,0x00] // GFX9: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU +// GFX10: s_buffer_store_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1b,0x60,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dwordx2 s[10:11], s[92:95], m0 // GFX89: s_buffer_store_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0xae,0x02,0x64,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU +// GFX10: s_buffer_store_dwordx2 s[10:11], s[92:95], m0 ; encoding: [0xae,0x02,0x64,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc // GFX89: s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x69,0xc0,0x7c,0x00,0x00,0x00] // NOSICI: error: invalid operand for instruction +// GFX10: s_buffer_store_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x69,0xf4,0x00,0x00,0x00,0xf8] s_buffer_store_dwordx2 tba, s[92:95], m0 glc // VI: s_buffer_store_dwordx2 tba, s[92:95], m0 glc ; encoding: [0x2e,0x1b,0x65,0xc0,0x7c,0x00,0x00,0x00] @@ -154,6 +160,8 @@ s_buffer_store_dwordx2 tba, s[92:95], m0 glc s_buffer_load_dword s10, s[92:95], m0 // GFX89: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x20,0xc0,0x7c,0x00,0x00,0x00] +// SICI: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0x7c,0x5c,0x05,0xc2] +// GFX10: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0xae,0x02,0x20,0xf4,0x00,0x00,0x00,0xf8] // SICIGFX10: s_buffer_load_dword s10, s[92:95], m0 ; encoding: [0x7c,0x5c,0x05,0xc2] s_buffer_load_dword tba_lo, s[92:95], m0 @@ -207,6 +215,7 @@ s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc // GFX89: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xc0,0x7c,0x00,0x00,0x00] // GFX10: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x2e,0x02,0x29,0xf4,0x00,0x00,0x00,0xf8] +// SICI: s_buffer_load_dwordx4 s[8:11], s[92:95], m0 glc ; encoding: [0x7c,0x5c,0x84,0xc2] //===----------------------------------------------------------------------===// // s_scratch instructions @@ -220,7 +229,7 @@ s_scratch_load_dword s5, s[2:3], s101 s_scratch_load_dword s5, s[2:3], s0 glc // GFX9: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_scratch_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x15,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVI: error +// NOSICIVI: error: invalid operand for instruction s_scratch_load_dwordx2 s[100:101], s[2:3], s0 // GFX9: s_scratch_load_dwordx2 s[100:101], s[2:3], s0 ; encoding: [0x01,0x19,0x18,0xc0,0x00,0x00,0x00,0x00] @@ -230,7 +239,7 @@ s_scratch_load_dwordx2 s[100:101], s[2:3], s0 s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc // GFX9: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x1b,0xc0,0x01,0x00,0x00,0x00] // GFX1012: s_scratch_load_dwordx2 s[10:11], s[2:3], 0x1 glc ; encoding: [0x81,0x02,0x19,0xf4,0x01,0x00,0x00,0xfa] -// NOSICIVI: error +// NOSICIVI: error: invalid operand for instruction s_scratch_load_dwordx4 s[20:23], s[4:5], s0 // GFX9: s_scratch_load_dwordx4 s[20:23], s[4:5], s0 ; encoding: [0x02,0x05,0x1c,0xc0,0x00,0x00,0x00,0x00] @@ -245,17 +254,17 @@ s_scratch_store_dword s101, s[4:5], s0 s_scratch_store_dword s1, s[4:5], 0x123 glc // GFX9: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x57,0xc0,0x23,0x01,0x00,0x00] // GFX1012: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa] -// NOSICIVI: error +// NOSICIVI: error: invalid operand for instruction s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc // GFX9: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xc0,0x65,0x00,0x00,0x00] // GFX1012: s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc ; encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca] -// NOSICIVI: error +// NOSICIVI: error: invalid operand for instruction s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc // GFX9: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xc0,0x00,0x00,0x00,0x00] // GFX1012: s_scratch_store_dwordx4 s[4:7], s[4:5], s0 glc ; encoding: [0x02,0x01,0x5d,0xf4,0x00,0x00,0x00,0x00] -// NOSICIVI: error +// NOSICIVI: error: invalid operand for instruction //===----------------------------------------------------------------------===// // s_dcache_discard instructions @@ -288,162 +297,162 @@ s_dcache_discard_x2 s[2:3], 0x0 s_atomic_add s5, s[2:3], s101 // GFX9: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_add s5, s[2:3], 0x0 // GFX9: s_atomic_add s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x0a,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_add s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_add s5, s[2:3], s0 glc // GFX9: s_atomic_add s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x09,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_add s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x09,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_add_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_and s5, s[2:3], s101 // GFX9: s_atomic_and s5, s[2:3], s101 ; encoding: [0x41,0x01,0x20,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_and s5, s[2:3], s101 ; encoding: [0x41,0x01,0x20,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_and_x2 s[10:11], s[2:3], 0x0 // GFX9: s_atomic_and_x2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0xa2,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_and_x2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0xa0,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap s[10:11], s[2:3], s101 // GFX9: s_atomic_cmpswap s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x04,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x04,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap s[10:11], s[2:3], 0x0 // GFX9: s_atomic_cmpswap s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x06,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x04,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap s[10:11], s[2:3], s0 glc // GFX9: s_atomic_cmpswap s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 ; encoding: [0x01,0x05,0x84,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], s101 ; encoding: [0x01,0x05,0x84,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x86,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x84,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc // GFX9: s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x85,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_cmpswap_x2 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x85,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_dec s5, s[2:3], s0 glc // GFX9: s_atomic_dec s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x31,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_dec s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x31,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_dec_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_dec_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xb0,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_dec_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xb0,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_inc s5, s[2:3], s0 glc // GFX9: s_atomic_inc s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x2d,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_inc s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x2d,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_inc_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_inc_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xac,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_inc_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0xac,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_or s5, s[2:3], 0x0 // GFX9: s_atomic_or s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x26,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_or s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x24,0xf6,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_or_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_or_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa5,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_or_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa5,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_smax s5, s[2:3], s101 // GFX9: s_atomic_smax s5, s[2:3], s101 ; encoding: [0x41,0x01,0x18,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_smax s5, s[2:3], s101 ; encoding: [0x41,0x01,0x18,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_smax_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_smax_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x99,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_smax_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x99,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_smin s5, s[2:3], s101 // GFX9: s_atomic_smin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x10,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_smin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x10,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_smin_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_smin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x91,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_smin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x91,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_sub s5, s[2:3], s101 // GFX9: s_atomic_sub s5, s[2:3], s101 ; encoding: [0x41,0x01,0x0c,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_sub s5, s[2:3], s101 ; encoding: [0x41,0x01,0x0c,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_sub_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_sub_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x8d,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_sub_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x8d,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_swap s5, s[2:3], s101 // GFX9: s_atomic_swap s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_swap s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_swap_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_swap_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x81,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_swap_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x81,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_umax s5, s[2:3], s0 glc // GFX9: s_atomic_umax s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x1d,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_umax s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x1d,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_umax_x2 s[10:11], s[2:3], s101 // GFX9: s_atomic_umax_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x9c,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_umax_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x9c,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_umin s5, s[2:3], s101 // GFX9: s_atomic_umin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_umin s5, s[2:3], s101 ; encoding: [0x41,0x01,0x14,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_umin_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_umin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x95,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_umin_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x95,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_xor s5, s[2:3], s101 // GFX9: s_atomic_xor s5, s[2:3], s101 ; encoding: [0x41,0x01,0x28,0xc2,0x65,0x00,0x00,0x00] // GFX1012: s_atomic_xor s5, s[2:3], s101 ; encoding: [0x41,0x01,0x28,0xf6,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_atomic_xor_x2 s[10:11], s[2:3], s0 glc // GFX9: s_atomic_xor_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa9,0xc2,0x00,0x00,0x00,0x00] // GFX1012: s_atomic_xor_x2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0xa9,0xf6,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// // s_buffer_atomic instructions @@ -452,162 +461,162 @@ s_atomic_xor_x2 s[10:11], s[2:3], s0 glc s_buffer_atomic_add s5, s[4:7], s101 // GFX9: s_buffer_atomic_add s5, s[4:7], s101 ; encoding: [0x42,0x01,0x08,0xc1,0x65,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add s5, s[4:7], s101 ; encoding: [0x42,0x01,0x08,0xf5,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_add s5, s[4:7], 0x0 // GFX9: s_buffer_atomic_add s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x0a,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x08,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_add s5, s[4:7], s0 glc // GFX9: s_buffer_atomic_add s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x09,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x09,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x88,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_add_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x88,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_and s101, s[4:7], s0 // GFX9: s_buffer_atomic_and s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_and s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 // GFX9: s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 ; encoding: [0x84,0x02,0xa0,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_and_x2 s[10:11], s[8:11], s0 ; encoding: [0x84,0x02,0xa0,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x04,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x04,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x06,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x04,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x05,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x05,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 ; encoding: [0x02,0x05,0x84,0xc1,0x65,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s101 ; encoding: [0x02,0x05,0x84,0xf5,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x86,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x84,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc // GFX9: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x85,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_cmpswap_x2 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x85,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_dec s5, s[4:7], s0 // GFX9: s_buffer_atomic_dec s5, s[4:7], s0 ; encoding: [0x42,0x01,0x30,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_dec s5, s[4:7], s0 ; encoding: [0x42,0x01,0x30,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xb1,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_dec_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xb1,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_inc s101, s[4:7], s0 // GFX9: s_buffer_atomic_inc s101, s[4:7], s0 ; encoding: [0x42,0x19,0x2c,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_inc s101, s[4:7], s0 ; encoding: [0x42,0x19,0x2c,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 // GFX9: s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0xae,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_inc_x2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0xac,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_or s5, s[8:11], s0 // GFX9: s_buffer_atomic_or s5, s[8:11], s0 ; encoding: [0x44,0x01,0x24,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_or s5, s[8:11], s0 ; encoding: [0x44,0x01,0x24,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 // GFX9: s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 ; encoding: [0xb0,0x02,0xa4,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_or_x2 s[10:11], s[96:99], s0 ; encoding: [0xb0,0x02,0xa4,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_smax s5, s[4:7], s101 // GFX9: s_buffer_atomic_smax s5, s[4:7], s101 ; encoding: [0x42,0x01,0x18,0xc1,0x65,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smax s5, s[4:7], s101 ; encoding: [0x42,0x01,0x18,0xf5,0x00,0x00,0x00,0xca] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 // GFX9: s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x98,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smax_x2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x98,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_smin s5, s[4:7], 0x0 // GFX9: s_buffer_atomic_smin s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x12,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smin s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x10,0xf5,0x00,0x00,0x00,0xfa] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 // GFX9: s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 ; encoding: [0x02,0x03,0x90,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_smin_x2 s[12:13], s[4:7], s0 ; encoding: [0x02,0x03,0x90,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_sub s5, s[4:7], s0 glc // GFX9: s_buffer_atomic_sub s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x0d,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_sub s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x0d,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 // GFX9: s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x8c,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_sub_x2 s[10:11], s[4:7], s0 ; encoding: [0x82,0x02,0x8c,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_swap s5, s[4:7], s0 // GFX9: s_buffer_atomic_swap s5, s[4:7], s0 ; encoding: [0x42,0x01,0x00,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_swap s5, s[4:7], s0 ; encoding: [0x42,0x01,0x00,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x81,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_swap_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x81,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_umax s5, s[4:7], s0 // GFX9: s_buffer_atomic_umax s5, s[4:7], s0 ; encoding: [0x42,0x01,0x1c,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umax s5, s[4:7], s0 ; encoding: [0x42,0x01,0x1c,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x9d,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umax_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x9d,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_umin s5, s[4:7], s0 // GFX9: s_buffer_atomic_umin s5, s[4:7], s0 ; encoding: [0x42,0x01,0x14,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umin s5, s[4:7], s0 ; encoding: [0x42,0x01,0x14,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x95,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_umin_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x95,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_xor s5, s[4:7], s0 // GFX9: s_buffer_atomic_xor s5, s[4:7], s0 ; encoding: [0x42,0x01,0x28,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_xor s5, s[4:7], s0 ; encoding: [0x42,0x01,0x28,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc // GFX9: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xc1,0x00,0x00,0x00,0x00] // GFX1012: s_buffer_atomic_xor_x2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0xa9,0xf5,0x00,0x00,0x00,0x00] -// NOSICIVI: error: +// NOSICIVI: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// // Unsigned 20-bit offsets (VI+) diff --git a/llvm/test/MC/AMDGPU/smrd-err.s b/llvm/test/MC/AMDGPU/smrd-err.s index d7ef74901c6f0..68f2ac6570c90 100644 --- a/llvm/test/MC/AMDGPU/smrd-err.s +++ b/llvm/test/MC/AMDGPU/smrd-err.s @@ -1,15 +1,14 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=NOVI --implicit-check-not=error: %s s_load_dwordx4 s[100:103], s[2:3], s4 -// VI: error: not a valid operand +// NOVI: error: not a valid operand // SI: s_load_dwordx4 s[100:103], s[2:3], s4 - s_load_dwordx8 s[96:103], s[2:3], s4 -// VI: error: not a valid operand +// NOVI: error: not a valid operand // SI: s_load_dwordx8 s[96:103], s[2:3], s4 s_load_dwordx16 s[88:103], s[2:3], s4 -// VI: error: not a valid operand +// NOVI: error: not a valid operand // SI: s_load_dwordx16 s[88:103], s[2:3], s4 diff --git a/llvm/test/MC/AMDGPU/smrd.s b/llvm/test/MC/AMDGPU/smrd.s index 0ad3b0f20645e..30f01b2ced1c3 100644 --- a/llvm/test/MC/AMDGPU/smrd.s +++ b/llvm/test/MC/AMDGPU/smrd.s @@ -3,9 +3,9 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=CI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: //===----------------------------------------------------------------------===// // Offset Handling diff --git a/llvm/test/MC/AMDGPU/sop1-err.s b/llvm/test/MC/AMDGPU/sop1-err.s index 0225fa1778ead..6322f5b098c35 100644 --- a/llvm/test/MC/AMDGPU/sop1-err.s +++ b/llvm/test/MC/AMDGPU/sop1-err.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI --implicit-check-not=error: %s s_mov_b32 v1, s2 // GCN: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/sop1.s b/llvm/test/MC/AMDGPU/sop1.s index 76525b943cad1..dafbf650b6715 100644 --- a/llvm/test/MC/AMDGPU/sop1.s +++ b/llvm/test/MC/AMDGPU/sop1.s @@ -1,71 +1,84 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=GFX89 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=GFX89 --check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=GFX89 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX89 --check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICI --check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=NOVI --check-prefix=NOSICIVI --check-prefix=NOGFX89 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck --check-prefix=NOGFX89 %s - -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding 2>&1 %s | FileCheck --check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --check-prefix=NOSICI --check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=NOVI --check-prefix=NOSICIVI --check-prefix=NOGFX89 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 --check-prefix=NOGFX89 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 2>&1 %s | FileCheck --check-prefix=GFX10-ERR --implicit-check-not=error: %s s_mov_b32 s1, s2 // SICI: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe] // GFX89: s_mov_b32 s1, s2 ; encoding: [0x02,0x00,0x81,0xbe] +// GFX10: s_mov_b32 s1, s2 ; encoding: [0x02,0x03,0x81,0xbe] s_mov_b32 s1, 1 // SICI: s_mov_b32 s1, 1 ; encoding: [0x81,0x03,0x81,0xbe] // GFX89: s_mov_b32 s1, 1 ; encoding: [0x81,0x00,0x81,0xbe] +// GFX10: s_mov_b32 s1, 1 ; encoding: [0x81,0x03,0x81,0xbe] s_mov_b32 s1, 100 // SICI: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00] // GFX89: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x00,0x81,0xbe,0x64,0x00,0x00,0x00] +// GFX10: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00] // Literal constant sign bit s_mov_b32 s1, 0x80000000 // SICI: s_mov_b32 s1, 0x80000000 ; encoding: [0xff,0x03,0x81,0xbe,0x00,0x00,0x00,0x80] // GFX89: s_mov_b32 s1, 0x80000000 ; encoding: [0xff,0x00,0x81,0xbe,0x00,0x00,0x00,0x80] +// GFX10: s_mov_b32 s1, 0x80000000 ; encoding: [0xff,0x03,0x81,0xbe,0x00,0x00,0x00,0x80] // Negative 32-bit constant s_mov_b32 s0, 0xfe5163ab // SICI: s_mov_b32 s0, 0xfe5163ab ; encoding: [0xff,0x03,0x80,0xbe,0xab,0x63,0x51,0xfe] // GFX89: s_mov_b32 s0, 0xfe5163ab ; encoding: [0xff,0x00,0x80,0xbe,0xab,0x63,0x51,0xfe] +// GFX10: s_mov_b32 s0, 0xfe5163ab ; encoding: [0xff,0x03,0x80,0xbe,0xab,0x63,0x51,0xfe] s_mov_b64 s[2:3], s[4:5] // SICI: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe] // GFX89: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x01,0x82,0xbe] +// GFX10: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe] s_mov_b64 null, s[4:5] // GFX10: s_mov_b64 null, s[4:5] ; encoding: [0x04,0x04,0xfd,0xbe] // NOSICIVI: error: not a valid operand. +// NOGFX9: error: not a valid operand. s_mov_b64 s[2:3], 0xffffffffffffffff // SICI: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe] // GFX89: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x01,0x82,0xbe] +// GFX10: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe] s_mov_b64 s[2:3], 0xffffffff // SICI: s_mov_b64 s[2:3], 0xffffffff ; encoding: [0xff,0x04,0x82,0xbe,0xff,0xff,0xff,0xff] // GFX89: s_mov_b64 s[2:3], 0xffffffff ; encoding: [0xff,0x01,0x82,0xbe,0xff,0xff,0xff,0xff] +// GFX10: s_mov_b64 s[2:3], 0xffffffff ; encoding: [0xff,0x04,0x82,0xbe,0xff,0xff,0xff,0xff] s_mov_b64 s[0:1], 0x80000000 // SICI: s_mov_b64 s[0:1], 0x80000000 ; encoding: [0xff,0x04,0x80,0xbe,0x00,0x00,0x00,0x80] // GFX89: s_mov_b64 s[0:1], 0x80000000 ; encoding: [0xff,0x01,0x80,0xbe,0x00,0x00,0x00,0x80] +// GFX10: s_mov_b64 s[0:1], 0x80000000 ; encoding: [0xff,0x04,0x80,0xbe,0x00,0x00,0x00,0x80] s_mov_b64 s[102:103], -1 // SICI: s_mov_b64 s[102:103], -1 ; encoding: [0xc1,0x04,0xe6,0xbe] // NOGFX89: error: not a valid operand +// GFX10: s_mov_b64 s[102:103], -1 ; encoding: [0xc1,0x04,0xe6,0xbe] s_cmov_b32 s1, 200 // SICI: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x05,0x81,0xbe,0xc8,0x00,0x00,0x00] // GFX89: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x02,0x81,0xbe,0xc8,0x00,0x00,0x00] +// GFX10: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x05,0x81,0xbe,0xc8,0x00,0x00,0x00] s_cmov_b32 s1, 1.0 // SICI: s_cmov_b32 s1, 1.0 ; encoding: [0xf2,0x05,0x81,0xbe] // GFX89: s_cmov_b32 s1, 1.0 ; encoding: [0xf2,0x02,0x81,0xbe] +// GFX10: s_cmov_b32 s1, 1.0 ; encoding: [0xf2,0x05,0x81,0xbe] s_cmov_b32 s1, s2 // SICI: s_cmov_b32 s1, s2 ; encoding: [0x02,0x05,0x81,0xbe] // GFX89: s_cmov_b32 s1, s2 ; encoding: [0x02,0x02,0x81,0xbe] +// GFX10: s_cmov_b32 s1, s2 ; encoding: [0x02,0x05,0x81,0xbe] //s_cmov_b64 s[2:3], 1.0 //GCN-FIXME: s_cmov_b64 s[2:3], 1.0 ; encoding: [0xf2,0x05,0x82,0xb3] @@ -73,174 +86,217 @@ s_cmov_b32 s1, s2 s_cmov_b64 s[2:3], s[4:5] // SICI: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x06,0x82,0xbe] // GFX89: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x03,0x82,0xbe] +// GFX10: s_cmov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x06,0x82,0xbe] s_not_b32 s1, s2 // SICI: s_not_b32 s1, s2 ; encoding: [0x02,0x07,0x81,0xbe] // GFX89: s_not_b32 s1, s2 ; encoding: [0x02,0x04,0x81,0xbe] +// GFX10: s_not_b32 s1, s2 ; encoding: [0x02,0x07,0x81,0xbe] s_not_b64 s[2:3], s[4:5] // SICI: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x08,0x82,0xbe] // GFX89: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x05,0x82,0xbe] +// GFX10: s_not_b64 s[2:3], s[4:5] ; encoding: [0x04,0x08,0x82,0xbe] s_wqm_b32 s1, s2 // SICI: s_wqm_b32 s1, s2 ; encoding: [0x02,0x09,0x81,0xbe] // GFX89: s_wqm_b32 s1, s2 ; encoding: [0x02,0x06,0x81,0xbe] +// GFX10: s_wqm_b32 s1, s2 ; encoding: [0x02,0x09,0x81,0xbe] s_wqm_b64 s[2:3], s[4:5] // SICI: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0a,0x82,0xbe] // GFX89: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x07,0x82,0xbe] +// GFX10: s_wqm_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0a,0x82,0xbe] s_brev_b32 s1, s2 // SICI: s_brev_b32 s1, s2 ; encoding: [0x02,0x0b,0x81,0xbe] // GFX89: s_brev_b32 s1, s2 ; encoding: [0x02,0x08,0x81,0xbe] +// GFX10: s_brev_b32 s1, s2 ; encoding: [0x02,0x0b,0x81,0xbe] s_brev_b64 s[2:3], s[4:5] // SICI: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0c,0x82,0xbe] // GFX89: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x09,0x82,0xbe] +// GFX10: s_brev_b64 s[2:3], s[4:5] ; encoding: [0x04,0x0c,0x82,0xbe] s_bcnt0_i32_b32 s1, s2 // SICI: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0d,0x81,0xbe] // GFX89: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0a,0x81,0xbe] +// GFX10: s_bcnt0_i32_b32 s1, s2 ; encoding: [0x02,0x0d,0x81,0xbe] s_bcnt0_i32_b64 s1, s[2:3] // SICI: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0e,0x81,0xbe] // GFX89: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0b,0x81,0xbe] +// GFX10: s_bcnt0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0e,0x81,0xbe] s_bcnt1_i32_b32 s1, s2 // SICI: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0f,0x81,0xbe] // GFX89: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0c,0x81,0xbe] +// GFX10: s_bcnt1_i32_b32 s1, s2 ; encoding: [0x02,0x0f,0x81,0xbe] s_bcnt1_i32_b64 s1, s[2:3] // SICI: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x10,0x81,0xbe] // GFX89: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0d,0x81,0xbe] +// GFX10: s_bcnt1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x10,0x81,0xbe] s_ff0_i32_b32 s1, s2 // SICI: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x11,0x81,0xbe] // GFX89: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x0e,0x81,0xbe] +// GFX10: s_ff0_i32_b32 s1, s2 ; encoding: [0x02,0x11,0x81,0xbe] s_ff0_i32_b64 s1, s[2:3] // SICI: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x12,0x81,0xbe] // GFX89: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x0f,0x81,0xbe] +// GFX10: s_ff0_i32_b64 s1, s[2:3] ; encoding: [0x02,0x12,0x81,0xbe] s_ff1_i32_b32 s1, s2 // SICI: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x13,0x81,0xbe] // GFX89: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x10,0x81,0xbe] +// GFX10: s_ff1_i32_b32 s1, s2 ; encoding: [0x02,0x13,0x81,0xbe] s_ff1_i32_b64 s1, s[2:3] // SICI: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x14,0x81,0xbe] // GFX89: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x11,0x81,0xbe] +// GFX10: s_ff1_i32_b64 s1, s[2:3] ; encoding: [0x02,0x14,0x81,0xbe] s_flbit_i32_b32 s1, s2 // SICI: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x15,0x81,0xbe] // GFX89: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x12,0x81,0xbe] +// GFX10: s_flbit_i32_b32 s1, s2 ; encoding: [0x02,0x15,0x81,0xbe] s_flbit_i32_b64 s1, s[2:3] // SICI: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x16,0x81,0xbe] // GFX89: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x13,0x81,0xbe] +// GFX10: s_flbit_i32_b64 s1, s[2:3] ; encoding: [0x02,0x16,0x81,0xbe] s_flbit_i32 s1, s2 // SICI: s_flbit_i32 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe] // GFX89: s_flbit_i32 s1, s2 ; encoding: [0x02,0x14,0x81,0xbe] +// GFX10: s_flbit_i32 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe] s_flbit_i32_i64 s1, s[2:3] // SICI: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x18,0x81,0xbe] // GFX89: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x15,0x81,0xbe] +// GFX10: s_flbit_i32_i64 s1, s[2:3] ; encoding: [0x02,0x18,0x81,0xbe] s_sext_i32_i8 s1, s2 // SICI: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x19,0x81,0xbe] // GFX89: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x16,0x81,0xbe] +// GFX10: s_sext_i32_i8 s1, s2 ; encoding: [0x02,0x19,0x81,0xbe] s_sext_i32_i16 s1, s2 // SICI: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe] // GFX89: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x17,0x81,0xbe] +// GFX10: s_sext_i32_i16 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe] s_bitset0_b32 s1, s2 // SICI: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x1b,0x81,0xbe] // GFX89: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x18,0x81,0xbe] +// GFX10: s_bitset0_b32 s1, s2 ; encoding: [0x02,0x1b,0x81,0xbe] s_bitset0_b64 s[2:3], s4 // SICI: s_bitset0_b64 s[2:3], s4 ; encoding: [0x04,0x1c,0x82,0xbe] // GFX89: s_bitset0_b64 s[2:3], s4 ; encoding: [0x04,0x19,0x82,0xbe] +// GFX10: s_bitset0_b64 s[2:3], s4 ; encoding: [0x04,0x1c,0x82,0xbe] s_bitset1_b32 s1, s2 // SICI: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1d,0x81,0xbe] // GFX89: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1a,0x81,0xbe] +// GFX10: s_bitset1_b32 s1, s2 ; encoding: [0x02,0x1d,0x81,0xbe] s_bitset1_b64 s[2:3], s4 // SICI: s_bitset1_b64 s[2:3], s4 ; encoding: [0x04,0x1e,0x82,0xbe] // GFX89: s_bitset1_b64 s[2:3], s4 ; encoding: [0x04,0x1b,0x82,0xbe] +// GFX10: s_bitset1_b64 s[2:3], s4 ; encoding: [0x04,0x1e,0x82,0xbe] s_getpc_b64 s[2:3] // SICI: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1f,0x82,0xbe] // GFX89: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1c,0x82,0xbe] +// GFX10: s_getpc_b64 s[2:3] ; encoding: [0x00,0x1f,0x82,0xbe] s_setpc_b64 s[4:5] // SICI: s_setpc_b64 s[4:5] ; encoding: [0x04,0x20,0x80,0xbe] // GFX89: s_setpc_b64 s[4:5] ; encoding: [0x04,0x1d,0x80,0xbe] +// GFX10: s_setpc_b64 s[4:5] ; encoding: [0x04,0x20,0x80,0xbe] s_swappc_b64 s[2:3], s[4:5] // SICI: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe] // GFX89: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x1e,0x82,0xbe] +// GFX10: s_swappc_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe] s_rfe_b64 s[4:5] // SICI: s_rfe_b64 s[4:5] ; encoding: [0x04,0x22,0x80,0xbe] // GFX89: s_rfe_b64 s[4:5] ; encoding: [0x04,0x1f,0x80,0xbe] +// GFX10: s_rfe_b64 s[4:5] ; encoding: [0x04,0x22,0x80,0xbe] s_and_saveexec_b64 s[2:3], s[4:5] // SICI: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe] // GFX89: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x20,0x82,0xbe] +// GFX10: s_and_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe] s_or_saveexec_b64 s[2:3], s[4:5] // SICI: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe] // GFX89: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x21,0x82,0xbe] +// GFX10: s_or_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe] s_xor_saveexec_b64 s[2:3], s[4:5] // SICI: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe] // GFX89: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x22,0x82,0xbe] +// GFX10: s_xor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe] s_andn2_saveexec_b64 s[2:3], s[4:5] // SICI: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe] // GFX89: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x23,0x82,0xbe] +// GFX10: s_andn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe] s_orn2_saveexec_b64 s[2:3], s[4:5] // SICI: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x28,0x82,0xbe] // GFX89: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x24,0x82,0xbe] +// GFX10: s_orn2_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x28,0x82,0xbe] s_nand_saveexec_b64 s[2:3], s[4:5] // SICI: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe] // GFX89: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x25,0x82,0xbe] +// GFX10: s_nand_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe] s_nor_saveexec_b64 s[2:3], s[4:5] // SICI: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2a,0x82,0xbe] // GFX89: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x26,0x82,0xbe] +// GFX10: s_nor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2a,0x82,0xbe] s_xnor_saveexec_b64 s[2:3], s[4:5] // SICI: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe] // GFX89: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x27,0x82,0xbe] +// GFX10: s_xnor_saveexec_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe] s_quadmask_b32 s1, s2 // SICI: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe] // GFX89: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x28,0x81,0xbe] +// GFX10: s_quadmask_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe] s_quadmask_b64 s[2:3], s[4:5] // SICI: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe] // GFX89: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x29,0x82,0xbe] +// GFX10: s_quadmask_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe] s_movrels_b32 s1, s2 // SICI: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2e,0x81,0xbe] // GFX89: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2a,0x81,0xbe] +// GFX10: s_movrels_b32 s1, s2 ; encoding: [0x02,0x2e,0x81,0xbe] s_movrels_b64 s[2:3], s[4:5] // SICI: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2f,0x82,0xbe] // GFX89: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2b,0x82,0xbe] +// GFX10: s_movrels_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2f,0x82,0xbe] s_movreld_b32 s1, s2 // SICI: s_movreld_b32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe] // GFX89: s_movreld_b32 s1, s2 ; encoding: [0x02,0x2c,0x81,0xbe] +// GFX10: s_movreld_b32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe] s_movreld_b64 s[2:3], s[4:5] // SICI: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x31,0x82,0xbe] // GFX89: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x2d,0x82,0xbe] +// GFX10: s_movreld_b64 s[2:3], s[4:5] ; encoding: [0x04,0x31,0x82,0xbe] s_cbranch_join s4 // SICI: s_cbranch_join s4 ; encoding: [0x04,0x32,0x80,0xbe] @@ -250,55 +306,69 @@ s_cbranch_join s4 s_cbranch_join 1 // NOSICI: error: invalid operand for instruction // NOGFX89: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_cbranch_join 100 // NOSICI: error: invalid operand for instruction // NOGFX89: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_abs_i32 s1, s2 // SICI: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe] // GFX89: s_abs_i32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe] +// GFX10: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe] s_set_gpr_idx_idx s0 // GFX89: s_set_gpr_idx_idx s0 ; encoding: [0x00,0x32,0x80,0xbe] // NOSICI: error: instruction not supported on this GPU +// GFX10-ERR: error: instruction not supported on this GPU s_andn1_saveexec_b64 s[100:101], s[2:3] // GFX9: s_andn1_saveexec_b64 s[100:101], s[2:3] ; encoding: [0x02,0x33,0xe4,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn1_saveexec_b64 s[100:101], s[2:3] ; encoding: [0x02,0x37,0xe4,0xbe] s_andn1_saveexec_b64 s[10:11], s[4:5] // GFX9: s_andn1_saveexec_b64 s[10:11], s[4:5] ; encoding: [0x04,0x33,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn1_saveexec_b64 s[10:11], s[4:5] ; encoding: [0x04,0x37,0x8a,0xbe] s_andn1_saveexec_b64 s[10:11], -1 // GFX9: s_andn1_saveexec_b64 s[10:11], -1 ; encoding: [0xc1,0x33,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn1_saveexec_b64 s[10:11], -1 ; encoding: [0xc1,0x37,0x8a,0xbe] s_andn1_saveexec_b64 s[10:11], 0xaf123456 // GFX9: s_andn1_saveexec_b64 s[10:11], 0xaf123456 ; encoding: [0xff,0x33,0x8a,0xbe,0x56,0x34,0x12,0xaf] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn1_saveexec_b64 s[10:11], 0xaf123456 ; encoding: [0xff,0x37,0x8a,0xbe,0x56,0x34,0x12,0xaf] s_andn1_wrexec_b64 s[10:11], s[2:3] // GFX9: s_andn1_wrexec_b64 s[10:11], s[2:3] ; encoding: [0x02,0x35,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn1_wrexec_b64 s[10:11], s[2:3] ; encoding: [0x02,0x39,0x8a,0xbe] s_andn2_wrexec_b64 s[12:13], s[2:3] // GFX9: s_andn2_wrexec_b64 s[12:13], s[2:3] ; encoding: [0x02,0x36,0x8c,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_andn2_wrexec_b64 s[12:13], s[2:3] ; encoding: [0x02,0x3a,0x8c,0xbe] s_orn1_saveexec_b64 s[10:11], 0 // GFX9: s_orn1_saveexec_b64 s[10:11], 0 ; encoding: [0x80,0x34,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_orn1_saveexec_b64 s[10:11], 0 ; encoding: [0x80,0x38,0x8a,0xbe] s_bitreplicate_b64_b32 s[10:11], s101 // GFX9: s_bitreplicate_b64_b32 s[10:11], s101 ; encoding: [0x65,0x37,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_bitreplicate_b64_b32 s[10:11], s101 ; encoding: [0x65,0x3b,0x8a,0xbe] s_bitreplicate_b64_b32 s[10:11], -1 // GFX9: s_bitreplicate_b64_b32 s[10:11], -1 ; encoding: [0xc1,0x37,0x8a,0xbe] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_bitreplicate_b64_b32 s[10:11], -1 ; encoding: [0xc1,0x3b,0x8a,0xbe] s_bitreplicate_b64_b32 s[10:11], 0x3f717273 // GFX9: s_bitreplicate_b64_b32 s[10:11], 0x3f717273 ; encoding: [0xff,0x37,0x8a,0xbe,0x73,0x72,0x71,0x3f] // NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_bitreplicate_b64_b32 s[10:11], 0x3f717273 ; encoding: [0xff,0x3b,0x8a,0xbe,0x73,0x72,0x71,0x3f] diff --git a/llvm/test/MC/AMDGPU/sop2-err.s b/llvm/test/MC/AMDGPU/sop2-err.s index 128a3d7b33ceb..f6a6054ebdccc 100644 --- a/llvm/test/MC/AMDGPU/sop2-err.s +++ b/llvm/test/MC/AMDGPU/sop2-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN --implicit-check-not=error: %s s_cbranch_g_fork 100, s[6:7] // GCN: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/sop2.s b/llvm/test/MC/AMDGPU/sop2.s index c1fe19a787d01..89f41a7b3d512 100644 --- a/llvm/test/MC/AMDGPU/sop2.s +++ b/llvm/test/MC/AMDGPU/sop2.s @@ -5,13 +5,12 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX89 --check-prefix=GFX9 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=NOSICIVI --check-prefix=NOVI --check-prefix=NOGFX89 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck --check-prefix=NOGFX89 %s - -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding 2>&1 %s | FileCheck --check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=NOSICIVI --check-prefix=NOVI --check-prefix=NOGFX89 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 --check-prefix=NOGFX89 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 2>&1 %s | FileCheck --check-prefix=GFX10-ERR --implicit-check-not=error: %s s_add_u32 s1, s2, s3 // GCN: s_add_u32 s1, s2, s3 ; encoding: [0x02,0x03,0x01,0x80] @@ -52,134 +51,167 @@ s_cselect_b64 s[2:3], s[4:5], s[6:7] s_and_b32 s2, s4, s6 // SICI: s_and_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x87] // GFX89: s_and_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x86] +// GFX10: s_and_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x87] s_and_b32 s2, 1234, 1234 // SICI: s_and_b32 s2, 0x4d2, 0x4d2 ; encoding: [0xff,0xff,0x02,0x87,0xd2,0x04,0x00,0x00] // GFX89: s_and_b32 s2, 0x4d2, 0x4d2 ; encoding: [0xff,0xff,0x02,0x86,0xd2,0x04,0x00,0x00] +// GFX10: s_and_b32 s2, 0x4d2, 0x4d2 ; encoding: [0xff,0xff,0x02,0x87,0xd2,0x04,0x00,0x00] s_and_b32 s2, 0xFFFF0000, -65536 // SICI: s_and_b32 s2, 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x02,0x87,0x00,0x00,0xff,0xff] // GFX89: s_and_b32 s2, 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x02,0x86,0x00,0x00,0xff,0xff] +// GFX10: s_and_b32 s2, 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x02,0x87,0x00,0x00,0xff,0xff] s_and_b64 null, s[4:5], s[6:7] // GFX10: s_and_b64 null, s[4:5], s[6:7] ; encoding: [0x04,0x06,0xfd,0x87] // NOSICIVI: error: not a valid operand. +// NOGFX9: error: not a valid operand. s_and_b64 s[2:3], s[4:5], s[6:7] // SICI: s_and_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x87] // GFX89: s_and_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x86] +// GFX10: s_and_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x87] s_or_b32 s2, s4, s6 // SICI: s_or_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x88] // GFX89: s_or_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x87] +// GFX10: s_or_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x88] s_or_b64 s[2:3], s[4:5], s[6:7] // SICI: s_or_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x88] // GFX89: s_or_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x87] +// GFX10: s_or_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x88] s_xor_b32 s2, s4, s6 // SICI: s_xor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x89] // GFX89: s_xor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x88] +// GFX10: s_xor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x89] s_xor_b64 s[2:3], s[4:5], s[6:7] // SICI: s_xor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x89] // GFX89: s_xor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x88] +// GFX10: s_xor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x89] s_andn2_b32 s2, s4, s6 // SICI: s_andn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8a] // GFX89: s_andn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x89] +// GFX10: s_andn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8a] s_andn2_b64 s[2:3], s[4:5], s[6:7] // SICI: s_andn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8a] // GFX89: s_andn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x89] +// GFX10: s_andn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8a] s_orn2_b32 s2, s4, s6 // SICI: s_orn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8b] // GFX89: s_orn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8a] +// GFX10: s_orn2_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8b] s_orn2_b64 s[2:3], s[4:5], s[6:7] // SICI: s_orn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8b] // GFX89: s_orn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8a] +// GFX10: s_orn2_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8b] s_nand_b32 s2, s4, s6 // SICI: s_nand_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8c] // GFX89: s_nand_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8b] +// GFX10: s_nand_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8c] s_nand_b64 s[2:3], s[4:5], s[6:7] // SICI: s_nand_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8c] // GFX89: s_nand_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8b] +// GFX10: s_nand_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8c] s_nor_b32 s2, s4, s6 // SICI: s_nor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8d] // GFX89: s_nor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8c] +// GFX10: s_nor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8d] s_nor_b64 s[2:3], s[4:5], s[6:7] // SICI: s_nor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8d] // GFX89: s_nor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8c] +// GFX10: s_nor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8d] s_xnor_b32 s2, s4, s6 // SICI: s_xnor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8e] // GFX89: s_xnor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8d] +// GFX10: s_xnor_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8e] s_xnor_b64 s[2:3], s[4:5], s[6:7] // SICI: s_xnor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8e] // GFX89: s_xnor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8d] +// GFX10: s_xnor_b64 s[2:3], s[4:5], s[6:7] ; encoding: [0x04,0x06,0x82,0x8e] s_lshl_b32 s2, s4, s6 // SICI: s_lshl_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8f] // GFX89: s_lshl_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8e] +// GFX10: s_lshl_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8f] s_lshl_b64 s[2:3], s[4:5], s6 // SICI: s_lshl_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8f] // GFX89: s_lshl_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8e] +// GFX10: s_lshl_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8f] s_lshr_b32 s2, s4, s6 // SICI: s_lshr_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x90] // GFX89: s_lshr_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x8f] +// GFX10: s_lshr_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x90] s_lshr_b64 s[2:3], s[4:5], s6 // SICI: s_lshr_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x90] // GFX89: s_lshr_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x8f] +// GFX10: s_lshr_b64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x90] s_ashr_i32 s2, s4, s6 // SICI: s_ashr_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x91] // GFX89: s_ashr_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x90] +// GFX10: s_ashr_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x91] s_ashr_i64 s[2:3], s[4:5], s6 // SICI: s_ashr_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x91] // GFX89: s_ashr_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x90] +// GFX10: s_ashr_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x91] s_ashr_i64 s[2:3], -65536, 0xFFFF0000 // SICI: s_ashr_i64 s[2:3], 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x82,0x91,0x00,0x00,0xff,0xff] // GFX89: s_ashr_i64 s[2:3], 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x82,0x90,0x00,0x00,0xff,0xff] +// GFX10: s_ashr_i64 s[2:3], 0xffff0000, 0xffff0000 ; encoding: [0xff,0xff,0x82,0x91,0x00,0x00,0xff,0xff] s_bfm_b32 s2, s4, s6 // SICI: s_bfm_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x92] // GFX89: s_bfm_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x91] +// GFX10: s_bfm_b32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x92] s_bfm_b64 s[2:3], s4, s6 // SICI: s_bfm_b64 s[2:3], s4, s6 ; encoding: [0x04,0x06,0x82,0x92] // GFX89: s_bfm_b64 s[2:3], s4, s6 ; encoding: [0x04,0x06,0x82,0x91] +// GFX10: s_bfm_b64 s[2:3], s4, s6 ; encoding: [0x04,0x06,0x82,0x92] s_mul_i32 s2, s4, s6 // SICI: s_mul_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x93] // GFX89: s_mul_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x92] +// GFX10: s_mul_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x93] s_bfe_u32 s2, s4, s6 // SICI: s_bfe_u32 s2, s4, s6 ; encoding: [0x04,0x06,0x82,0x93] // GFX89: s_bfe_u32 s2, s4, s6 ; encoding: [0x04,0x06,0x82,0x92] +// GFX10: s_bfe_u32 s2, s4, s6 ; encoding: [0x04,0x06,0x82,0x93] s_bfe_i32 s2, s4, s6 // SICI: s_bfe_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x94] // GFX89: s_bfe_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x93] +// GFX10: s_bfe_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x94] s_bfe_u64 s[2:3], s[4:5], s6 // SICI: s_bfe_u64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x94] // GFX89: s_bfe_u64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x93] +// GFX10: s_bfe_u64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x82,0x94] s_bfe_i64 s[2:3], s[4:5], s6 // SICI: s_bfe_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x02,0x95] // GFX89: s_bfe_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x02,0x94] +// GFX10: s_bfe_i64 s[2:3], s[4:5], s6 ; encoding: [0x04,0x06,0x02,0x95] s_cbranch_g_fork s[4:5], s[6:7] // SICI: s_cbranch_g_fork s[4:5], s[6:7] ; encoding: [0x04,0x06,0x80,0x95] @@ -199,79 +231,99 @@ s_cbranch_g_fork s[6:7], 2 s_absdiff_i32 s2, s4, s6 // SICI: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x96] // GFX89: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x95] +// GFX10: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x96] s_add_u32 s101, s102, s103 // SICI: s_add_u32 s101, s102, s103 ; encoding: [0x66,0x67,0x65,0x80] // NOGFX89: error: not a valid operand +// GFX10: s_add_u32 s101, s102, s103 ; encoding: [0x66,0x67,0x65,0x80] s_lshl1_add_u32 s5, s1, s2 // GFX9: s_lshl1_add_u32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0x97] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl1_add_u32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0x97] s_lshl1_add_u32 s5, -1, s2 // GFX9: s_lshl1_add_u32 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0x97] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl1_add_u32 s5, -1, s2 ; encoding: [0xc1,0x02,0x05,0x97] s_lshl1_add_u32 s5, s1, 0 // GFX9: s_lshl1_add_u32 s5, s1, 0 ; encoding: [0x01,0x80,0x05,0x97] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl1_add_u32 s5, s1, 0 ; encoding: [0x01,0x80,0x05,0x97] s_lshl1_add_u32 s5, s1, 0x3f717273 // GFX9: s_lshl1_add_u32 s5, s1, 0x3f717273 ; encoding: [0x01,0xff,0x05,0x97,0x73,0x72,0x71,0x3f] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl1_add_u32 s5, s1, 0x3f717273 ; encoding: [0x01,0xff,0x05,0x97,0x73,0x72,0x71,0x3f] s_lshl2_add_u32 s101, s1, s2 // GFX9: s_lshl2_add_u32 s101, s1, s2 ; encoding: [0x01,0x02,0xe5,0x97] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl2_add_u32 s101, s1, s2 ; encoding: [0x01,0x02,0xe5,0x97] s_lshl2_add_u32 s5, 0xaf123456, s2 // GFX9: s_lshl2_add_u32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x85,0x97,0x56,0x34,0x12,0xaf] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl2_add_u32 s5, 0xaf123456, s2 ; encoding: [0xff,0x02,0x85,0x97,0x56,0x34,0x12,0xaf] s_lshl3_add_u32 s5, 0x3f717273, s2 // GFX9: s_lshl3_add_u32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x05,0x98,0x73,0x72,0x71,0x3f] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl3_add_u32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x05,0x98,0x73,0x72,0x71,0x3f] s_lshl3_add_u32 s5, s1, s101 // GFX9: s_lshl3_add_u32 s5, s1, s101 ; encoding: [0x01,0x65,0x05,0x98] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl3_add_u32 s5, s1, s101 ; encoding: [0x01,0x65,0x05,0x98] s_lshl4_add_u32 s5, s1, 0xaf123456 // GFX9: s_lshl4_add_u32 s5, s1, 0xaf123456 ; encoding: [0x01,0xff,0x85,0x98,0x56,0x34,0x12,0xaf] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl4_add_u32 s5, s1, 0xaf123456 ; encoding: [0x01,0xff,0x85,0x98,0x56,0x34,0x12,0xaf] s_lshl4_add_u32 s5, -1, s2 // GFX9: s_lshl4_add_u32 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0x98] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_lshl4_add_u32 s5, -1, s2 ; encoding: [0xc1,0x02,0x85,0x98] s_mul_hi_i32 s5, s101, s2 // GFX9: s_mul_hi_i32 s5, s101, s2 ; encoding: [0x65,0x02,0x85,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_i32 s5, s101, s2 ; encoding: [0x65,0x02,0x05,0x9b] s_mul_hi_i32 s5, 0, s2 // GFX9: s_mul_hi_i32 s5, 0, s2 ; encoding: [0x80,0x02,0x85,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_i32 s5, 0, s2 ; encoding: [0x80,0x02,0x05,0x9b] s_mul_hi_i32 s5, 0x3f717273, s2 // GFX9: s_mul_hi_i32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x85,0x96,0x73,0x72,0x71,0x3f] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_i32 s5, 0x3f717273, s2 ; encoding: [0xff,0x02,0x05,0x9b,0x73,0x72,0x71,0x3f] s_mul_hi_i32 s5, s1, s101 // GFX9: s_mul_hi_i32 s5, s1, s101 ; encoding: [0x01,0x65,0x85,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_i32 s5, s1, s101 ; encoding: [0x01,0x65,0x05,0x9b] s_mul_hi_i32 s5, s1, 0 // GFX9: s_mul_hi_i32 s5, s1, 0 ; encoding: [0x01,0x80,0x85,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_i32 s5, s1, 0 ; encoding: [0x01,0x80,0x05,0x9b] s_mul_hi_u32 s5, s1, 0x3f717273 // GFX9: s_mul_hi_u32 s5, s1, 0x3f717273 ; encoding: [0x01,0xff,0x05,0x96,0x73,0x72,0x71,0x3f] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_u32 s5, s1, 0x3f717273 ; encoding: [0x01,0xff,0x85,0x9a,0x73,0x72,0x71,0x3f] s_mul_hi_u32 s5, s1, s101 // GFX9: s_mul_hi_u32 s5, s1, s101 ; encoding: [0x01,0x65,0x05,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_u32 s5, s1, s101 ; encoding: [0x01,0x65,0x85,0x9a] s_mul_hi_u32 s5, s1, 0 // GFX9: s_mul_hi_u32 s5, s1, 0 ; encoding: [0x01,0x80,0x05,0x96] -// NOSICIVI: error +// NOSICIVI: error: instruction not supported on this GPU +// GFX10: s_mul_hi_u32 s5, s1, 0 ; encoding: [0x01,0x80,0x85,0x9a] diff --git a/llvm/test/MC/AMDGPU/sopc-err.s b/llvm/test/MC/AMDGPU/sopc-err.s index 88788862f1d75..5f2021a5aaf5d 100644 --- a/llvm/test/MC/AMDGPU/sopc-err.s +++ b/llvm/test/MC/AMDGPU/sopc-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI --implicit-check-not=error: %s s_set_gpr_idx_on s0, s1 // VI: error: expected absolute expression diff --git a/llvm/test/MC/AMDGPU/sopc.s b/llvm/test/MC/AMDGPU/sopc.s index 38b385aa6a360..3ef217798a2ef 100644 --- a/llvm/test/MC/AMDGPU/sopc.s +++ b/llvm/test/MC/AMDGPU/sopc.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SICI %s // RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // SOPC Instructions @@ -76,41 +76,51 @@ s_cmp_lg_u64 s[0:1], s[2:3] gpr_idx = 1 s_set_gpr_idx_on s0, gpr_idx // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; encoding: [0x00,0x01,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction gpr_idx_mode = 10 s_set_gpr_idx_on s0, gpr_idx_mode + 5 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x00,0x0f,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, 0 // VI: s_set_gpr_idx_on s0, gpr_idx() ; encoding: [0x00,0x00,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, gpr_idx() // VI: s_set_gpr_idx_on s0, gpr_idx() ; encoding: [0x00,0x00,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: unknown token in expression +// GFX10-ERR: error: unknown token in expression s_set_gpr_idx_on s0, 1 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; encoding: [0x00,0x01,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, gpr_idx(SRC0) // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0) ; encoding: [0x00,0x01,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, 3 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1) ; encoding: [0x00,0x03,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, gpr_idx(SRC1,SRC0) // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1) ; encoding: [0x00,0x03,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: expected ')' in parentheses expression +// GFX10-ERR: error: expected ')' in parentheses expression s_set_gpr_idx_on s0, 15 // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x00,0x0f,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction +// GFX10-ERR: error: invalid operand for instruction s_set_gpr_idx_on s0, gpr_idx(SRC0,DST,SRC2,SRC1) // VI: s_set_gpr_idx_on s0, gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x00,0x0f,0x11,0xbf] -// NOSICI: error: +// NOSICI: error: expected ')' in parentheses expression +// GFX10-ERR: error: expected ')' in parentheses expression diff --git a/llvm/test/MC/AMDGPU/sopk-err.s b/llvm/test/MC/AMDGPU/sopk-err.s index 7d1bd8110b5d8..2311c72b52b24 100644 --- a/llvm/test/MC/AMDGPU/sopk-err.s +++ b/llvm/test/MC/AMDGPU/sopk-err.s @@ -1,9 +1,14 @@ -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GFX9-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck -check-prefixes=SICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefixes=SI,SICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefixes=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s + +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefixes=GCN,SICIVI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=GCN,SICIVI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=GCN,SICIVI-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=GCN,GFX9-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefixes=GCN,GFX10-ERR --implicit-check-not=error: %s s_setreg_b32 0x1f803, s2 // GCN: error: invalid immediate: only 16-bit values are legal @@ -42,61 +47,55 @@ s_getreg_b32 s2, hwreg(3,32,32) // GCN: error: invalid bit offset: only 5-bit values are legal s_cbranch_i_fork s[2:3], 0x6 -// GFX10: error: instruction not supported on this GPU +// SICI: s_cbranch_i_fork s[2:3], 6 ; encoding: [0x06,0x00,0x82,0xb8] +// GFX10-ERR: error: instruction not supported on this GPU +// GFX9: s_cbranch_i_fork s[2:3], 6 ; encoding: [0x06,0x00,0x02,0xb8] +// VI: s_cbranch_i_fork s[2:3], 6 ; encoding: [0x06,0x00,0x02,0xb8] s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) ; encoding: [0x0f,0xf8,0x82,0xb8] // GFX10: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) ; encoding: [0x0f,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) ; encoding: [0x0f,0xf8,0x82,0xb8] s_getreg_b32 s2, hwreg(HW_REG_TBA_LO) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TBA_LO) ; encoding: [0x10,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_TBA_HI) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TBA_HI) ; encoding: [0x11,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_TMA_LO) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TMA_LO) ; encoding: [0x12,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_TMA_HI) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_TMA_HI) ; encoding: [0x13,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_LO) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_LO) ; encoding: [0x14,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_HI) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_HI) ; encoding: [0x15,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) ; encoding: [0x16,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_getreg_b32 s2, hwreg(HW_REG_POPS_PACKER) -// SI-ERR: specified hardware register is not supported on this GPU -// VI-ERR: specified hardware register is not supported on this GPU -// GFX9-ERR: specified hardware register is not supported on this GPU // GFX10: s_getreg_b32 s2, hwreg(HW_REG_POPS_PACKER) ; encoding: [0x19,0xf8,0x02,0xb9] +// SICIVI-ERR: error: specified hardware register is not supported on this GPU +// GFX9-ERR: error: specified hardware register is not supported on this GPU s_cmpk_le_u32 s2, -1 // GCN: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/sopk.s b/llvm/test/MC/AMDGPU/sopk.s index ebadd76cee2f3..e128df94c611f 100644 --- a/llvm/test/MC/AMDGPU/sopk.s +++ b/llvm/test/MC/AMDGPU/sopk.s @@ -4,10 +4,10 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=VI9 --check-prefix=GFX9 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=NOSICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOSI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOSI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=NOSICIVI -check-prefix=NOVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // Instructions diff --git a/llvm/test/MC/AMDGPU/sopp-err.s b/llvm/test/MC/AMDGPU/sopp-err.s index 2a78940655fc7..f3181de9438fa 100644 --- a/llvm/test/MC/AMDGPU/sopp-err.s +++ b/llvm/test/MC/AMDGPU/sopp-err.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=SICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=SICI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=SICIVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefix=GCN %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=SICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=SICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=VI --check-prefix=SICIVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GCN --check-prefix=GFX10 --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // sendmsg @@ -84,15 +84,22 @@ s_sendmsg sendmsg(MSG_GS_DONE, 0, 0) s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) // SICIVI: error: invalid message id +// SICI: error: invalid message id s_sendmsg sendmsg(MSG_GS_ALLOC_REQ, 0) // SICIVI: error: invalid message id +// SICI: error: invalid message id +// GFX10: error: message does not support operations s_sendmsg sendmsg(-1) // SICIVI: error: invalid message id +// SICI: error: invalid message id +// GFX10: error: invalid message id s_sendmsg sendmsg(16) // SICIVI: error: invalid message id +// SICI: error: invalid message id +// GFX10: error: invalid message id s_sendmsg sendmsg(MSG_SYSMSG) // GCN: error: missing message operation @@ -112,6 +119,7 @@ s_sendmsg sendmsg(MSG_SYSMSG, 5) s_waitcnt lgkmcnt(16) // SICIVI: error: too large value for lgkmcnt +// SICI: error: too large value for lgkmcnt s_waitcnt lgkmcnt(64) // GCN: error: too large value for lgkmcnt @@ -121,9 +129,12 @@ s_waitcnt expcnt(8) s_waitcnt vmcnt(16) // SICIVI: error: too large value for vmcnt +// SICI: error: too large value for vmcnt s_waitcnt vmcnt(64) // GFX10: error: too large value for vmcnt +// SICI: error: too large value for vmcnt +// SICIVI: error: too large value for vmcnt s_waitcnt vmcnt(0xFFFFFFFFFFFF0000) // GCN: error: too large value for vmcnt diff --git a/llvm/test/MC/AMDGPU/sopp.s b/llvm/test/MC/AMDGPU/sopp.s index 4be9323741157..63783f61c6bf1 100644 --- a/llvm/test/MC/AMDGPU/sopp.s +++ b/llvm/test/MC/AMDGPU/sopp.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SICI %s -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=VI %s //===----------------------------------------------------------------------===// @@ -357,23 +357,23 @@ s_ttracedata s_set_gpr_idx_off // VI: s_set_gpr_idx_off ; encoding: [0x00,0x00,0x9c,0xbf] -// NOSICI: error: +// NOSICI: error: instruction not supported on this GPU s_set_gpr_idx_mode 0 // VI: s_set_gpr_idx_mode gpr_idx() ; encoding: [0x00,0x00,0x9d,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction s_set_gpr_idx_mode gpr_idx() // VI: s_set_gpr_idx_mode gpr_idx() ; encoding: [0x00,0x00,0x9d,0xbf] -// NOSICI: error: +// NOSICI: error: unknown token in expression s_set_gpr_idx_mode 15 // VI: s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x0f,0x00,0x9d,0xbf] -// NOSICI: error: +// NOSICI: error: invalid operand for instruction s_set_gpr_idx_mode gpr_idx(SRC2,SRC1,SRC0,DST) // VI: s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST) ; encoding: [0x0f,0x00,0x9d,0xbf] -// NOSICI: error: +// NOSICI: error: expected ')' in parentheses expression s_endpgm_saved // VI: s_endpgm_saved ; encoding: [0x00,0x00,0x9b,0xbf] diff --git a/llvm/test/MC/AMDGPU/trap.s b/llvm/test/MC/AMDGPU/trap.s index 7b527ba3072e9..5d23c1f30d6ed 100644 --- a/llvm/test/MC/AMDGPU/trap.s +++ b/llvm/test/MC/AMDGPU/trap.s @@ -3,10 +3,10 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=VI // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9 -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOSICIVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: //===----------------------------------------------------------------------===// // Trap Handler related - 32 bit registers diff --git a/llvm/test/MC/AMDGPU/vintrp-err.s b/llvm/test/MC/AMDGPU/vintrp-err.s index 08ab2797ce535..00491e0fe9877 100644 --- a/llvm/test/MC/AMDGPU/vintrp-err.s +++ b/llvm/test/MC/AMDGPU/vintrp-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI --implicit-check-not=error: %s v_interp_p1_f32 v0, v1, attr64.w // GCN: :25: error: out of bounds attr diff --git a/llvm/test/MC/AMDGPU/vop-err.s b/llvm/test/MC/AMDGPU/vop-err.s index 13388263b20e9..c66b5b90e27a4 100644 --- a/llvm/test/MC/AMDGPU/vop-err.s +++ b/llvm/test/MC/AMDGPU/vop-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck --implicit-check-not=error: %s // GENERIC LIMITATIONS ON VOP FORMATS: CONSTANT BUS RESTRICTIONS diff --git a/llvm/test/MC/AMDGPU/vop1-gfx9-err.s b/llvm/test/MC/AMDGPU/vop1-gfx9-err.s index 61bf5f6617595..9345632855379 100644 --- a/llvm/test/MC/AMDGPU/vop1-gfx9-err.s +++ b/llvm/test/MC/AMDGPU/vop1-gfx9-err.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN,GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN,VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefixes=GCN,CI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=GCN,GFX9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=GCN,VI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefixes=GCN,CI --implicit-check-not=error: %s v_swap_b32 v1, 1 // GCN: :16: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/vop1-gfx9.s b/llvm/test/MC/AMDGPU/vop1-gfx9.s index 96e328c433bab..9f74e3a71a6c4 100644 --- a/llvm/test/MC/AMDGPU/vop1-gfx9.s +++ b/llvm/test/MC/AMDGPU/vop1-gfx9.s @@ -1,7 +1,7 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s v_swap_b32 v1, v2 // GFX9: v_swap_b32 v1, v2 ; encoding: [0x02,0xa3,0x02,0x7e] diff --git a/llvm/test/MC/AMDGPU/vop1.s b/llvm/test/MC/AMDGPU/vop1.s index e9d288418c42a..12a033c92992c 100644 --- a/llvm/test/MC/AMDGPU/vop1.s +++ b/llvm/test/MC/AMDGPU/vop1.s @@ -3,10 +3,10 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=CIVI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: // Force 32-bit encoding diff --git a/llvm/test/MC/AMDGPU/vop2-err.s b/llvm/test/MC/AMDGPU/vop2-err.s index c446f1f01ec15..526483b1f5c32 100644 --- a/llvm/test/MC/AMDGPU/vop2-err.s +++ b/llvm/test/MC/AMDGPU/vop2-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // Generic checks diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s index 1505c8cfa44d9..b2893154dd6dd 100644 --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -3,10 +3,10 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: //===----------------------------------------------------------------------===// // Generic Checks for floating-point instructions (These have modifiers). diff --git a/llvm/test/MC/AMDGPU/vop3-convert.s b/llvm/test/MC/AMDGPU/vop3-convert.s index 0bdf86cb55862..a654af5e47521 100644 --- a/llvm/test/MC/AMDGPU/vop3-convert.s +++ b/llvm/test/MC/AMDGPU/vop3-convert.s @@ -3,10 +3,10 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: v_mov_b32 [v1], [v2] // GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e] diff --git a/llvm/test/MC/AMDGPU/vop3-errs.s b/llvm/test/MC/AMDGPU/vop3-errs.s index 9fbce05155430..01cbb130f95c6 100644 --- a/llvm/test/MC/AMDGPU/vop3-errs.s +++ b/llvm/test/MC/AMDGPU/vop3-errs.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error: v_add_f32_e64 v0, v1 // GCN: error: too few operands for instruction diff --git a/llvm/test/MC/AMDGPU/vop3-gfx9.s b/llvm/test/MC/AMDGPU/vop3-gfx9.s index e11271ab1eedc..c98fc47093f83 100644 --- a/llvm/test/MC/AMDGPU/vop3-gfx9.s +++ b/llvm/test/MC/AMDGPU/vop3-gfx9.s @@ -1,507 +1,648 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOVI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=NOGFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=NOSI,NOSICI,NOGCN --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefixes=NOCI,NOSICI,NOGCN --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefixes=NOVI,NOGCN --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=NOGFX9 --implicit-check-not=error: %s v_lshl_add_u32 v1, v2, v3, v4 // GFX9: v_lshl_add_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfd,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_add_lshl_u32 v1, v2, v3, v4 // GFX9: v_add_lshl_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfe,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_add3_u32 v1, v2, v3, v4 // GFX9: v_add3_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xff,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_lshl_or_b32 v1, v2, v3, v4 // GFX9: v_lshl_or_b32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0x00,0xd2,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_and_or_b32 v1, v2, v3, v4 // GFX9: v_and_or_b32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0x01,0xd2,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_or3_b32 v1, v2, v3, v4 // GFX9: v_or3_b32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0x02,0xd2,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_pack_b32_f16 v1, v2, v3 // GFX9: v_pack_b32_f16 v1, v2, v3 ; encoding: [0x01,0x00,0xa0,0xd2,0x02,0x07,0x02,0x00] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] // GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0xa0,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_pack_b32_f16 v5, v1, v2 op_sel:[0,1,0] // GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[0,1,0] ; encoding: [0x05,0x10,0xa0,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_pack_b32_f16 v5, v1, v2 op_sel:[0,0,1] // GFX9: v_pack_b32_f16 v5, v1, v2 op_sel:[0,0,1] ; encoding: [0x05,0x40,0xa0,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_xad_u32 v1, v2, v3, v4 // GFX9: v_xad_u32 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf3,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_min3_f16 v1, v2, v3, v4 // GFX9: v_min3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf4,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_min3_i16 v1, v2, v3, v4 // GFX9: v_min3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf5,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_min3_u16 v1, v2, v3, v4 // GFX9: v_min3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf6,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_max3_f16 v1, v2, v3, v4 // GFX9: v_max3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf7,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,0] // GFX9: v_max3_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf7,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v1, v2, v3, v4 // GFX9: v_max3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf8,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,0] // GFX9: v_max3_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_max3_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf8,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_max3_u16 v1, v2, v3, v4 // GFX9: v_max3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xf9,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_med3_f16 v1, v2, v3, v4 // GFX9: v_med3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfa,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_med3_i16 v1, v2, v3, v4 // GFX9: v_med3_i16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfb,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_med3_u16 v1, v2, v3, v4 // GFX9: v_med3_u16 v1, v2, v3, v4 ; encoding: [0x01,0x00,0xfc,0xd1,0x02,0x07,0x12,0x04] -// NOVI: :1: error: instruction not supported on this GPU +// NOGCN: :1: error: instruction not supported on this GPU v_mad_u32_u16 v5, v1, v2, v3 // GFX9: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_u32_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0xf1,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_i32_i16 v5, v1, v2, v3 // GFX9: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xf2,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_i32_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_i32_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0xf2,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, v2 // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: instruction not supported on this GPU v_cvt_pknorm_i16_f16 v5, -v1, v2 // GFX9: v_cvt_pknorm_i16_f16 v5, -v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x20] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, -v2 // GFX9: v_cvt_pknorm_i16_f16 v5, v1, -v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x40] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, -v1, -v2 // GFX9: v_cvt_pknorm_i16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x60] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, |v1|, v2 // GFX9: v_cvt_pknorm_i16_f16 v5, |v1|, v2 ; encoding: [0x05,0x01,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, |v2| // GFX9: v_cvt_pknorm_i16_f16 v5, v1, |v2| ; encoding: [0x05,0x02,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[0,0,0] // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,0,0] // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_cvt_pknorm_i16_f16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x99,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_u16_f16 v5, -v1, -v2 // GFX9: v_cvt_pknorm_u16_f16 v5, -v1, -v2 ; encoding: [0x05,0x00,0x9a,0xd2,0x01,0x05,0x02,0x60] +// NOGCN: error: not a valid operand. v_cvt_pknorm_u16_f16 v5, |v1|, |v2| // GFX9: v_cvt_pknorm_u16_f16 v5, |v1|, |v2| ; encoding: [0x05,0x03,0x9a,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_cvt_pknorm_u16_f16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_cvt_pknorm_u16_f16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9a,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_add_i16 v5, v1, v2 // GFX9: v_add_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x9e,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: instruction not supported on this GPU v_add_i16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_add_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9e,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_sub_i16 v5, v1, v2 // GFX9: v_sub_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x9f,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: instruction not supported on this GPU v_sub_i16 v5, v1, v2 op_sel:[1,1,1] // GFX9: v_sub_i16 v5, v1, v2 op_sel:[1,1,1] ; encoding: [0x05,0x58,0x9f,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: not a valid operand. v_sub_i16 v5, v1, v2 clamp // GFX9: v_sub_i16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x9f,0xd2,0x01,0x05,0x02,0x00] +// NOGCN: error: invalid operand for instruction v_fma_f16_e64 v5, v1, v2, v3 // GFX9: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_fma_f16 v5, v1, -v2, v3 // GFX9: v_fma_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x44] +// NOSICI: error: not a valid operand. v_fma_f16 v5, v1, v2, |v3| // GFX9: v_fma_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_fma_f16 v5, v1, v2, v3 clamp // GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x06,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_fma_legacy_f16_e64 v5, v1, v2, v3 // GFX9: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_fma_legacy_f16 v5, -v1, v2, v3 // GFX9: v_fma_legacy_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x24] +// NOGCN: error: not a valid operand. v_fma_legacy_f16 v5, v1, |v2|, v3 // GFX9: v_fma_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_fma_legacy_f16 v5, v1, v2, v3 clamp // GFX9: v_fma_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_div_fixup_f16_e64 v5, 0.5, v2, v3 // GFX9: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, 0.5, v3 // GFX9: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, 0.5 // GFX9: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, -v1, v2, v3 // GFX9: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0x24] +// NOSICI: error: not a valid operand. v_div_fixup_f16 v5, |v1|, v2, v3 // GFX9: v_div_fixup_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0x07,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_div_fixup_f16 v5, v1, v2, v3 clamp // GFX9: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x07,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x07,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x07,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_div_fixup_legacy_f16_e64 v5, 0.5, v2, v3 // GFX9: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_div_fixup_legacy_f16 v5, v1, 0.5, v3 // GFX9: v_div_fixup_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04] +// NOGCN: error: instruction not supported on this GPU v_div_fixup_legacy_f16 v5, v1, v2, 0.5 // GFX9: v_div_fixup_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03] +// NOGCN: error: instruction not supported on this GPU v_div_fixup_legacy_f16 v5, -v1, v2, v3 // GFX9: v_div_fixup_legacy_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x24] +// NOGCN: error: not a valid operand. v_div_fixup_legacy_f16 v5, v1, |v2|, v3 // GFX9: v_div_fixup_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xef,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp // GFX9: v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_mad_f16_e64 v5, 0.5, v2, v3 // GFX9: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, 0.5, v3 // GFX9: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, 0.5 // GFX9: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, -v3 // GFX9: v_mad_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x84] +// NOSICI: error: not a valid operand. v_mad_f16 v5, v1, v2, |v3| // GFX9: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, v3 clamp // GFX9: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_mad_i16_e64 v5, 0, v2, v3 // GFX9: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_i16 v5, v1, -1, v3 // GFX9: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_i16 v5, v1, v2, -4.0 -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid literal operand v_mad_i16 v5, v1, v2, v3 clamp // GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x05,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x05,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_legacy_f16_e64 v5, 0.5, v2, v3 // GFX9: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_f16 v5, v1, 0.5, v3 // GFX9: v_mad_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_f16 v5, v1, v2, 0.5 // GFX9: v_mad_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_f16 v5, v1, -v2, v3 // GFX9: v_mad_legacy_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x44] +// NOGCN: error: not a valid operand. v_mad_legacy_f16 v5, v1, |v2|, v3 // GFX9: v_mad_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xea,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: not a valid operand. v_mad_legacy_f16 v5, v1, v2, v3 clamp // GFX9: v_mad_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_mad_legacy_i16_e64 v5, 0, v2, v3 // GFX9: v_mad_legacy_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_i16 v5, v1, -1, v3 // GFX9: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_i16 v5, v1, v2, -4.0 -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_i16 v5, v1, v2, -4.0 clamp -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOGCN: error: invalid operand for instruction v_mad_legacy_u16_e64 v5, 0, v2, v3 // GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_u16 v5, v1, -1, v3 // GFX9: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04] +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_u16 v5, v1, v2, -4.0 -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOGCN: error: instruction not supported on this GPU v_mad_legacy_u16 v5, v1, v2, -4.0 clamp -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOGCN: error: invalid operand for instruction v_mad_u16_e64 v5, 0, v2, v3 // GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_u16 v5, v1, -1, v3 // GFX9: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, -4.0 -// NOGFX9: invalid literal operand +// NOGFX9: error: invalid literal operand +// NOSICI: error: instruction not supported on this GPU +// NOVI: error: invalid literal operand v_mad_u16 v5, v1, v2, v3 clamp // GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x04,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x04,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] // GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x04,0xd2,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. +// NOVI: error: instruction not supported on this GPU v_interp_p2_f16 v5, v2, attr0.x, v3 // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_interp_p2_f16 v5, -v2, attr0.x, v3 // GFX9: v_interp_p2_f16 v5, -v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x44] +// NOSICI: error: not a valid operand. v_interp_p2_f16 v5, v2, attr0.x, |v3| // GFX9: v_interp_p2_f16 v5, v2, attr0.x, |v3| ; encoding: [0x05,0x04,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: error: not a valid operand. v_interp_p2_f16 v5, v2, attr0.w, v3 // GFX9: v_interp_p2_f16 v5, v2, attr0.w, v3 ; encoding: [0x05,0x00,0x77,0xd2,0xc0,0x04,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_interp_p2_f16 v5, v2, attr0.x, v3 high // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 high ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_interp_p2_f16 v5, v2, attr0.x, v3 clamp // GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 // GFX9: v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x1f,0x04,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_interp_p2_legacy_f16 v5, -v2, attr0.x, v3 // GFX9: v_interp_p2_legacy_f16 v5, -v2, attr0.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x04,0x0e,0x44] +// NOGCN: error: not a valid operand. v_interp_p2_legacy_f16 v5, v2, attr0.x, |v3| // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.x, |v3| ; encoding: [0x05,0x04,0x76,0xd2,0x00,0x04,0x0e,0x04] +// NOGCN: error: not a valid operand. v_interp_p2_legacy_f16 v5, v2, attr0.w, v3 // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.w, v3 ; encoding: [0x05,0x00,0x76,0xd2,0xc0,0x04,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 high // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 high ; encoding: [0x05,0x00,0x76,0xd2,0x00,0x05,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 clamp // GFX9: v_interp_p2_legacy_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04] +// NOGCN: error: invalid operand for instruction v_cvt_norm_i16_f16_e64 v5, -v1 // GFX9: v_cvt_norm_i16_f16_e64 v5, -v1 ; encoding: [0x05,0x00,0x8d,0xd1,0x01,0x01,0x00,0x20] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: not a valid operand. v_cvt_norm_i16_f16_e64 v5, |v1| // GFX9: v_cvt_norm_i16_f16_e64 v5, |v1| ; encoding: [0x05,0x01,0x8d,0xd1,0x01,0x01,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: not a valid operand. v_cvt_norm_u16_f16_e64 v5, -v1 // GFX9: v_cvt_norm_u16_f16_e64 v5, -v1 ; encoding: [0x05,0x00,0x8e,0xd1,0x01,0x01,0x00,0x20] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: not a valid operand. v_cvt_norm_u16_f16_e64 v5, |v1| // GFX9: v_cvt_norm_u16_f16_e64 v5, |v1| ; encoding: [0x05,0x01,0x8e,0xd1,0x01,0x01,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: not a valid operand. v_sat_pk_u8_i16_e64 v5, -1 // GFX9: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0x8f,0xd1,0xc1,0x00,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_sat_pk_u8_i16_e64 v5, v255 // GFX9: v_sat_pk_u8_i16_e64 v5, v255 ; encoding: [0x05,0x00,0x8f,0xd1,0xff,0x01,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_screen_partition_4se_b32_e64 v5, v1 // GXF9: [0x05,0x00,0x77,0xd1,0x01,0x01,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU +// GFX9: v_screen_partition_4se_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x77,0xd1,0x01,0x01,0x00,0x00] v_screen_partition_4se_b32_e64 v5, -1 // GXF9: [0x05,0x00,0x77,0xd1,0xc1,0x00,0x00,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU +// GFX9: v_screen_partition_4se_b32_e64 v5, -1 ; encoding: [0x05,0x00,0x77,0xd1,0xc1,0x00,0x00,0x00] v_add_u32 v84, v13, s31 clamp // GFX9: v_add_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x34,0xd1,0x0d,0x3f,0x00,0x00] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_sub_u32 v84, v13, s31 clamp // GFX9: v_sub_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x35,0xd1,0x0d,0x3f,0x00,0x00] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_subrev_u32 v84, v13, s31 clamp // GFX9: v_subrev_u32_e64 v84, v13, s31 clamp ; encoding: [0x54,0x80,0x36,0xd1,0x0d,0x3f,0x00,0x00] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_addc_co_u32 v84, s[4:5], v13, v31, vcc clamp // GFX9: v_addc_co_u32_e64 v84, s[4:5], v13, v31, vcc clamp ; encoding: [0x54,0x84,0x1c,0xd1,0x0d,0x3f,0xaa,0x01] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_subb_co_u32 v84, s[2:3], v13, v31, vcc clamp // GFX9: v_subb_co_u32_e64 v84, s[2:3], v13, v31, vcc clamp ; encoding: [0x54,0x82,0x1d,0xd1,0x0d,0x3f,0xaa,0x01] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_subbrev_co_u32 v84, vcc, v13, v31, s[6:7] clamp // GFX9: v_subbrev_co_u32_e64 v84, vcc, v13, v31, s[6:7] clamp ; encoding: [0x54,0xea,0x1e,0xd1,0x0d,0x3f,0x1a,0x00] -// NOVI: error: +// NOGCN: error: invalid operand for instruction v_add_co_u32 v84, s[4:5], v13, v31 clamp // GFX9: v_add_co_u32_e64 v84, s[4:5], v13, v31 clamp ; encoding: [0x54,0x84,0x19,0xd1,0x0d,0x3f,0x02,0x00] -// NOVI: error: +// NOSICI: error: integer clamping is not supported on this GPU +// NOVI: error: invalid operand for instruction v_sub_co_u32 v84, s[2:3], v13, v31 clamp // GFX9: v_sub_co_u32_e64 v84, s[2:3], v13, v31 clamp ; encoding: [0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x02,0x00] -// NOVI: error: +// NOSICI: error: integer clamping is not supported on this GPU +// NOVI: error: invalid operand for instruction v_subrev_co_u32 v84, vcc, v13, v31 clamp // GFX9: v_subrev_co_u32_e64 v84, vcc, v13, v31 clamp ; encoding: [0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x02,0x00] -// NOVI: error: +// NOSICI: error: integer clamping is not supported on this GPU +// NOVI: error: invalid operand for instruction v_addc_co_u32 v84, vcc, v13, v31, vcc // GFX9: v_addc_co_u32_e32 v84, vcc, v13, v31, vcc ; encoding: [0x0d,0x3f,0xa8,0x38] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_subb_co_u32 v84, vcc, v13, v31, vcc // GFX9: v_subb_co_u32_e32 v84, vcc, v13, v31, vcc ; encoding: [0x0d,0x3f,0xa8,0x3a] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_subbrev_co_u32 v84, vcc, v13, v31, vcc // GFX9: v_subbrev_co_u32_e32 v84, vcc, v13, v31, vcc ; encoding: [0x0d,0x3f,0xa8,0x3c] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_add_co_u32 v84, vcc, v13, v31 // GFX9: v_add_co_u32_e32 v84, vcc, v13, v31 ; encoding: [0x0d,0x3f,0xa8,0x32] @@ -517,97 +658,97 @@ v_subrev_co_u32 v84, vcc, v13, v31 v_add_i32 v1, v2, v3 // GFX9: v_add_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x9c,0xd2,0x02,0x07,0x02,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_add_i32 v1, v2, v3 clamp // GFX9: v_add_i32 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x9c,0xd2,0x02,0x07,0x02,0x00] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction v_sub_i32 v1, v2, v3 // GFX9: v_sub_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x9d,0xd2,0x02,0x07,0x02,0x00] -// NOVI: error: instruction not supported on this GPU +// NOGCN: error: instruction not supported on this GPU v_sub_i32 v1, v2, v3 clamp // GFX9: v_sub_i32 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x9d,0xd2,0x02,0x07,0x02,0x00] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction //===----------------------------------------------------------------------===// // Validate register size checks (bug 37943) //===----------------------------------------------------------------------===// -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], s0, v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], s[0:3], v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], v0, v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], v[0:2], v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], v[0:3], v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], v[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f64 v[0:1], v[0:1], s0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32 v0, s[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32 v0, v[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32 v0, v0, s[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f32 v0, v0, v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f16 v0, s[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f16 v0, v[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f16 v0, v0, s[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_f16 v0, v0, v[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_u16 v0, s[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_u16 v0, v[0:1], v0 -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_u16 v0, v0, s[0:1] -// NOVI: error: invalid operand for instruction +// NOGCN: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_add_u16 v0, v0, v[0:1] diff --git a/llvm/test/MC/AMDGPU/vop3-literal.s b/llvm/test/MC/AMDGPU/vop3-literal.s index 99265352f0cec..43223108163a0 100644 --- a/llvm/test/MC/AMDGPU/vop3-literal.s +++ b/llvm/test/MC/AMDGPU/vop3-literal.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9-ERR --implicit-check-not=error: %s v_bfe_u32 v0, 0x3039, v1, s1 // GFX10: v_bfe_u32 v0, 0x3039, v1, s1 ; encoding: [0x00,0x00,0x48,0xd5,0xff,0x02,0x06,0x00,0x39,0x30,0x00,0x00] @@ -44,12 +44,12 @@ v_bfe_u32 v0, 0x3039, 0x12345, v2 // GFX9-ERR: error: invalid literal operand v_bfe_u32 v0, s1, 0x3039, s1 -// GFX10-ERR: v_bfe_u32 v0, s1, 0x3039, s1 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00] // GFX9-ERR: error: invalid literal operand +// GFX10: v_bfe_u32 v0, s1, 0x3039, s1 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00] v_bfe_u32 v0, s1, 0x3039, s2 -// GFX10: error: invalid operand (violates constant bus restrictions) // GFX9-ERR: error: invalid literal operand +// GFX10-ERR: error: invalid operand (violates constant bus restrictions) v_bfm_b32_e64 v0, 0x3039, s1 // GFX10: v_bfm_b32_e64 v0, 0x3039, s1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00] @@ -197,12 +197,15 @@ v_min3_i16 v5, 0x5678, 0x5678, 0x5679 v_add_nc_u16 v5, 0xfe0b, v2 // GFX10: v_add_nc_u16_e64 v5, 0xfe0b, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_add_nc_u16 v5, v1, 0x1234 // GFX10: v_add_nc_u16_e64 v5, v1, 0x1234 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x34,0x12,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_add_nc_u16 v5, 0x1234, 0x1234 // GFX10: v_add_nc_u16_e64 v5, 0x1234, 0x1234 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfe,0x01,0x00,0x34,0x12,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_ashrrev_i16_e64 v5, 0x3456, v2 // GFX10: v_ashrrev_i16_e64 v5, 0x3456, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00] @@ -254,6 +257,7 @@ v_cmp_f_i32_e64 s[10:11], 0xaf123456, 0xaf123456 v_cmp_f_i32_e64 s[10:11], 0xaf123456, 0xaf123455 // GFX10-ERR: error: invalid literal operand +// GFX9-ERR: error: invalid literal operand v_cmp_f_u64_e64 s[10:11], 0xaf123456, v[2:3] // GFX10: v_cmp_f_u64_e64 s[10:11], 0xaf123456, v[2:3] ; encoding: [0x0a,0x00,0xe0,0xd4,0xff,0x04,0x02,0x00,0x56,0x34,0x12,0xaf] @@ -269,33 +273,43 @@ v_cmp_f_u64_e64 s[10:11], 0x3f717273, 0x3f717273 v_cmpx_class_f32_e64 0xaf123456, v2 // GFX10: v_cmpx_class_f32_e64 0xaf123456, v2 ; encoding: [0x00,0x00,0x98,0xd4,0xff,0x04,0x02,0x00,0x56,0x34,0x12,0xaf] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_class_f32_e64 v1, 0xaf123456 // GFX10: v_cmpx_class_f32_e64 v1, 0xaf123456 ; encoding: [0x00,0x00,0x98,0xd4,0x01,0xff,0x01,0x00,0x56,0x34,0x12,0xaf] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_class_f32_e64 0xaf123456, 0xaf123456 // GFX10: v_cmpx_class_f32_e64 0xaf123456, 0xaf123456 ; encoding: [0x00,0x00,0x98,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_class_f32_e64 0xaf123456, 0xaf123455 // GFX10-ERR: error: invalid literal operand +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_lt_i16_e64 v1, 0x3456 // GFX10: v_cmpx_lt_i16_e64 v1, 0x3456 ; encoding: [0x00,0x00,0x99,0xd4,0x01,0xff,0x01,0x00,0x56,0x34,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_lt_i16_e64 0x3456, v2 // GFX10: v_cmpx_lt_i16_e64 0x3456, v2 ; encoding: [0x00,0x00,0x99,0xd4,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_lt_i16_e64 0x3456, 0x3456 // GFX10: v_cmpx_lt_i16_e64 0x3456, 0x3456 ; encoding: [0x00,0x00,0x99,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_f_i64_e64 0xaf123456, v[2:3] // GFX10: v_cmpx_f_i64_e64 0xaf123456, v[2:3] ; encoding: [0x00,0x00,0xb0,0xd4,0xff,0x04,0x02,0x00,0x56,0x34,0x12,0xaf] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_f_i64_e64 v[1:2], 0x3f717273 // GFX10: v_cmpx_f_i64_e64 v[1:2], 0x3f717273 ; encoding: [0x00,0x00,0xb0,0xd4,0x01,0xff,0x01,0x00,0x73,0x72,0x71,0x3f] +// GFX9-ERR: error: instruction not supported on this GPU v_cmpx_f_i64_e64 0x3f717273, 0x3f717273 // GFX10: v_cmpx_f_i64_e64 0x3f717273, 0x3f717273 ; encoding: [0x00,0x00,0xb0,0xd4,0xff,0xfe,0x01,0x00,0x73,0x72,0x71,0x3f] +// GFX9-ERR: error: instruction not supported on this GPU v_lshlrev_b64 v[5:6], 0xaf123456, v[2:3] // GFX10: v_lshlrev_b64 v[5:6], 0xaf123456, v[2:3] ; encoding: [0x05,0x00,0xff,0xd6,0xff,0x04,0x02,0x00,0x56,0x34,0x12,0xaf] @@ -307,18 +321,23 @@ v_lshlrev_b64 v[5:6], v1, 0x3f717273 v_fma_mix_f32 v5, 0x123, v2, v3 // GFX10: v_fma_mix_f32 v5, 0x123, v2, v3 ; encoding: [0x05,0x00,0x20,0xcc,0xff,0x04,0x0e,0x04,0x23,0x01,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v5, v1, 0x7b, v3 // GFX10: v_fma_mix_f32 v5, v1, 0x7b, v3 ; encoding: [0x05,0x00,0x20,0xcc,0x01,0xff,0x0d,0x04,0x7b,0x00,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v5, v1, v2, 0x1c8 // GFX10: v_fma_mix_f32 v5, v1, v2, 0x1c8 ; encoding: [0x05,0x00,0x20,0xcc,0x01,0x05,0xfe,0x03,0xc8,0x01,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v5, 0x1c8a, v2, 0x1c8a // GFX10: v_fma_mix_f32 v5, 0x1c8a, v2, 0x1c8a ; encoding: [0x05,0x00,0x20,0xcc,0xff,0x04,0xfe,0x03,0x8a,0x1c,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_fma_mix_f32 v5, 0x1c8a, 0x1c8a, 0x1c8a // GFX10: v_fma_mix_f32 v5, 0x1c8a, 0x1c8a, 0x1c8a ; encoding: [0x05,0x00,0x20,0xcc,0xff,0xfe,0xfd,0x03,0x8a,0x1c,0x00,0x00] +// GFX9-ERR: error: instruction not supported on this GPU v_pk_add_f16 v5, 0xaf123456, v2 // GFX10: v_pk_add_f16 v5, 0xaf123456, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/vop3-modifiers-err.s b/llvm/test/MC/AMDGPU/vop3-modifiers-err.s index b28768c1ca09f..95811c789e844 100644 --- a/llvm/test/MC/AMDGPU/vop3-modifiers-err.s +++ b/llvm/test/MC/AMDGPU/vop3-modifiers-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck --implicit-check-not=error: %s //---------------------------------------------------------------------------// // VOP3 Modifiers diff --git a/llvm/test/MC/AMDGPU/vop3.s b/llvm/test/MC/AMDGPU/vop3.s index 2e90817677404..e5ff3f030a6fc 100644 --- a/llvm/test/MC/AMDGPU/vop3.s +++ b/llvm/test/MC/AMDGPU/vop3.s @@ -1,14 +1,14 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck %s --check-prefix=CI +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck %s --check-prefix=CI --check-prefix=SICI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI // Make sure interp instructions disassemble regardless of lds bank count // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=VI -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI - +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck %s -check-prefix=NOCI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s //===----------------------------------------------------------------------===// // VOPC Instructions @@ -287,39 +287,42 @@ v_mac_f32_e64 v0, -v1, |v2| // VI: v_mac_f32_e64 v0, -v1, |v2| ; encoding: [0x00,0x02,0x16,0xd1,0x01,0x05,0x02,0x20] v_mac_f16_e64 v0, 0.5, flat_scratch_lo -// NOSICI: error: // VI: v_mac_f16_e64 v0, 0.5, flat_scratch_lo ; encoding: [0x00,0x00,0x23,0xd1,0xf0,0xcc,0x00,0x00] +// NOCI: error: instruction not supported on this GPU +// NOSI: error: not a valid operand. v_mac_f16_e64 v0, -4.0, flat_scratch_lo -// NOSICI: error: // VI: v_mac_f16_e64 v0, -4.0, flat_scratch_lo ; encoding: [0x00,0x00,0x23,0xd1,0xf7,0xcc,0x00,0x00] +// NOCI: error: instruction not supported on this GPU +// NOSI: error: not a valid operand. v_mac_f16_e64 v0, flat_scratch_lo, -4.0 -// NOSICI: error: // VI: v_mac_f16_e64 v0, flat_scratch_lo, -4.0 ; encoding: [0x00,0x00,0x23,0xd1,0x66,0xee,0x01,0x00] +// NOCI: error: instruction not supported on this GPU +// NOSI: error: not a valid operand. v_add_u32 v84, vcc, v13, s31 clamp -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_add_u32_e64 v84, vcc, v13, s31 clamp ; encoding: [0x54,0xea,0x19,0xd1,0x0d,0x3f,0x00,0x00] v_sub_u32 v84, s[2:3], v13, s31 clamp -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_sub_u32_e64 v84, s[2:3], v13, s31 clamp ; encoding: [0x54,0x82,0x1a,0xd1,0x0d,0x3f,0x00,0x00] v_subrev_u32 v84, vcc, v13, s31 clamp -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_subrev_u32_e64 v84, vcc, v13, s31 clamp ; encoding: [0x54,0xea,0x1b,0xd1,0x0d,0x3f,0x00,0x00] v_addc_u32 v84, s[4:5], v13, v31, vcc clamp -// NOSICI: error: +// NOSICI: error: integer clamping is not supported on this GPU // VI: v_addc_u32_e64 v84, s[4:5], v13, v31, vcc clamp ; encoding: [0x54,0x84,0x1c,0xd1,0x0d,0x3f,0xaa,0x01] v_subb_u32 v84, s[2:3], v13, v31, vcc clamp -// NOSICI: error: +// NOSICI: error: integer clamping is not supported on this GPU // VI: v_subb_u32_e64 v84, s[2:3], v13, v31, vcc clamp ; encoding: [0x54,0x82,0x1d,0xd1,0x0d,0x3f,0xaa,0x01] v_subbrev_u32 v84, vcc, v13, v31, s[6:7] clamp -// NOSICI: error: +// NOSICI: error: integer clamping is not supported on this GPU // VI: v_subbrev_u32_e64 v84, vcc, v13, v31, s[6:7] clamp ; encoding: [0x54,0xea,0x1e,0xd1,0x0d,0x3f,0x1a,0x00] ///===---------------------------------------------------------------------===// @@ -493,81 +496,107 @@ v_cubeid_f32 v0, |-1|, |-1.0|, |1.0| v_fma_f16_e64 v5, v1, v2, v3 // VI: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_fma_f16 v5, v1, v2, 0.5 // VI: v_fma_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0xc2,0x03] +// NOSICI: error: instruction not supported on this GPU v_fma_f16 v5, -v1, -v2, -v3 // VI: v_fma_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0xe4] +// NOSICI: error: not a valid operand. v_fma_f16 v5, |v1|, |v2|, |v3| // VI: v_fma_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_fma_f16 v5, v1, v2, v3 clamp // VI: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_div_fixup_f16_e64 v5, v1, v2, v3 // VI: v_div_fixup_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, 0.5, v2, v3 // VI: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, 0.5, v3 // VI: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, 0.5 // VI: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, v1, v2, -4.0 // VI: v_div_fixup_f16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xde,0x03] +// NOSICI: error: instruction not supported on this GPU v_div_fixup_f16 v5, -v1, v2, v3 // VI: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x24] +// NOSICI: error: not a valid operand. v_div_fixup_f16 v5, v1, |v2|, v3 // VI: v_div_fixup_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xef,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_div_fixup_f16 v5, v1, v2, v3 clamp // VI: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_mad_f16_e64 v5, v1, v2, v3 // VI: v_mad_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, 0.5, v2, v3 // VI: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, 0.5, v3 // VI: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, v2, 0.5 // VI: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03] +// NOSICI: error: instruction not supported on this GPU v_mad_f16 v5, v1, -v2, v3 // VI: v_mad_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x44] +// NOSICI: error: not a valid operand. v_mad_f16 v5, v1, v2, |v3| // VI: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0xea,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: not a valid operand. v_mad_f16 v5, v1, v2, v3 clamp // VI: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] +// NOSICI: error: invalid operand for instruction v_mad_i16_e64 v5, -1, v2, v3 // VI: v_mad_i16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0xc1,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_i16 v5, v1, -4.0, v3 // NOVI: error: invalid literal operand +// NOSICI: error: instruction not supported on this GPU v_mad_i16 v5, v1, v2, 0 // VI: v_mad_i16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0x02,0x02] +// NOSICI: error: instruction not supported on this GPU v_mad_u16_e64 v5, -1, v2, v3 // VI: v_mad_u16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0xc1,0x04,0x0e,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_u16 v5, v1, 0, v3 // VI: v_mad_u16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x01,0x0d,0x04] +// NOSICI: error: instruction not supported on this GPU v_mad_u16 v5, v1, v2, -4.0 // NOVI: error: invalid literal operand +// NOSICI: error: instruction not supported on this GPU ///===---------------------------------------------------------------------===// // VOP3 with Integer Clamp @@ -606,19 +635,21 @@ v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp // VI: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe6,0xd1,0x01,0x05,0x0e,0x04] v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp -// NOSICI: error: // VI: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe5,0xd1,0x01,0x05,0x0e,0x04] +// NOCI: error: integer clamping is not supported on this GPU +// NOSI: error: invalid operand for instruction v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp -// NOSICI: error: // VI: v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp ; encoding: [0xfc,0x80,0xe7,0xd1,0x01,0x05,0x0e,0x04] +// NOCI: error: integer clamping is not supported on this GPU +// NOSI: error: invalid operand for instruction v_mad_u16 v5, v1, v2, v3 clamp -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0x0e,0x04] v_mad_i16 v5, v1, v2, v3 clamp -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04] // diff --git a/llvm/test/MC/AMDGPU/vop3p-err.s b/llvm/test/MC/AMDGPU/vop3p-err.s index 9dfd28a4b9f94..614a348ae133f 100644 --- a/llvm/test/MC/AMDGPU/vop3p-err.s +++ b/llvm/test/MC/AMDGPU/vop3p-err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX9 --implicit-check-not=error: %s // GFX9: 25: error: invalid operand for instruction v_pk_add_u16 v1, v2, v3 op_sel @@ -15,7 +15,7 @@ v_pk_add_u16 v1, v2, v3 op_sel:[] // GFX9: 33: error: unknown token in expression v_pk_add_u16 v1, v2, v3 op_sel:[,] -// XXGFX9: 34: error: failed parsing operand. +// FIXME: Should trigger an error. // v_pk_add_u16 v1, v2, v3 op_sel:[0] // GFX9: 35: error: expected a comma @@ -51,14 +51,14 @@ v_pk_add_u16 v1, v2, v3 op_sel:[0,-1] // GFX9: 40: error: expected a closing square bracket v_pk_add_u16 v1, v2, v3 op_sel:[0,0,0,0,0] -// XXGFX9: invalid operand for instruction +// FIXME: should trigger an error v_pk_add_u16 v1, v2, v3 neg_lo:[0,0] // // Regular modifiers on packed instructions // -// FIXME: should be invalid operand for instruction +// FIXME: should be "invalid operand for instruction" // GFX9: :18: error: not a valid operand. v_pk_add_f16 v1, |v2|, v3 @@ -87,5 +87,5 @@ v_pk_add_u16 v1, -v2, v3 // Constant bus restrictions // -// GFX9: invalid operand (violates constant bus restrictions) +// GFX9: error: invalid operand (violates constant bus restrictions) v_pk_add_f16 v255, s1, s2 diff --git a/llvm/test/MC/AMDGPU/vop_dpp.s b/llvm/test/MC/AMDGPU/vop_dpp.s index c059b80fd6de9..e0dfc255a89a1 100644 --- a/llvm/test/MC/AMDGPU/vop_dpp.s +++ b/llvm/test/MC/AMDGPU/vop_dpp.s @@ -1,61 +1,61 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=VI9 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=VI9 -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: //===----------------------------------------------------------------------===// // Check dpp_ctrl values //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x58,0x00,0xff] v_mov_b32 v0, v0 quad_perm:[0,2,1,1] -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x01,0xff] v_mov_b32 v0, v0 row_shl:1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x1f,0x01,0xff] v_mov_b32 v0, v0 row_shr:0xf -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 row_ror:12 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x2c,0x01,0xff] v_mov_b32 v0, v0 row_ror:0xc -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 wave_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x30,0x01,0xff] v_mov_b32 v0, v0 wave_shl:1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 wave_rol:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x34,0x01,0xff] v_mov_b32 v0, v0 wave_rol:1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 wave_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x38,0x01,0xff] v_mov_b32 v0, v0 wave_shr:1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 wave_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x3c,0x01,0xff] v_mov_b32 v0, v0 wave_ror:1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI9: v_mov_b32_dpp v0, v0 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x40,0x01,0xff] v_mov_b32 v0, v0 row_mirror -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI9: v_mov_b32_dpp v0, v0 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x41,0x01,0xff] v_mov_b32 v0, v0 row_half_mirror -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 row_bcast:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x42,0x01,0xff] v_mov_b32 v0, v0 row_bcast:15 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 row_bcast:31 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x43,0x01,0xff] v_mov_b32 v0, v0 row_bcast:31 @@ -63,31 +63,31 @@ v_mov_b32 v0, v0 row_bcast:31 // Check optional fields //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xa1] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xaf] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xf1] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bank_mask:0x1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xff] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xa1] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xaf] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xf1] v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bank_mask:0x1 bound_ctrl:0 @@ -95,19 +95,19 @@ v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bank_mask:0x1 bound_ctrl:0 // Check modifiers //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f32_dpp v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x19,0xa1] v_add_f32 v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f32_dpp v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x89,0xa1] v_add_f32 v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f32_dpp v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x99,0xa1] v_add_f32 v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f32_dpp v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x69,0xa1] v_add_f32 v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 @@ -115,242 +115,244 @@ v_add_f32 v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // Check VOP1 opcodes //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. +// NOVI: error: not a valid operand. v_nop row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_u32_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0e,0x00,0x7e,0x00,0x01,0x09,0xa1] v_cvt_u32_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_fract_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x36,0x00,0x7e,0x00,0x01,0x09,0xa1] v_fract_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sin_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x52,0x00,0x7e,0x00,0x01,0x09,0xa1] v_sin_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mov_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x02,0x7e,0x00,0x01,0x09,0xa1] v_mov_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_i32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_i32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_u32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_u32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x10,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f16_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x14,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f16_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x16,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_rpi_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x18,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_rpi_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_flr_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x1a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_flr_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_off_f32_i4_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x1c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_off_f32_i4 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_ubyte0_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x22,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte0 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_ubyte1_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x24,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte1 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_ubyte2_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x26,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte2 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f32_ubyte3_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x28,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f32_ubyte3 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_trunc_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x38,0x02,0x7e,0x00,0x01,0x09,0xa1] v_trunc_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ceil_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ceil_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rndne_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rndne_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_floor_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_floor_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_exp_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x40,0x02,0x7e,0x00,0x01,0x09,0xa1] v_exp_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_log_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x42,0x02,0x7e,0x00,0x01,0x09,0xa1] v_log_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rcp_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x44,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rcp_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rcp_iflag_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x46,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rcp_iflag_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rsq_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x48,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rsq_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sqrt_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x4e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_sqrt_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cos_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x54,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cos_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_not_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x56,0x02,0x7e,0x00,0x01,0x09,0xa1] v_not_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_bfrev_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x58,0x02,0x7e,0x00,0x01,0x09,0xa1] v_bfrev_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ffbh_u32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ffbh_u32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ffbl_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ffbl_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ffbh_i32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ffbh_i32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_frexp_exp_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x66,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_exp_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_frexp_mant_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x68,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_mant_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_log_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x98,0x02,0x7e,0x00,0x01,0x09,0xa1] v_log_legacy_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_exp_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x96,0x02,0x7e,0x00,0x01,0x09,0xa1] v_exp_legacy_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f16_u16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x72,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f16_u16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_f16_i16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x74,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_f16_i16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_u16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x76,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_u16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cvt_i16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x78,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cvt_i16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rcp_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rcp_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sqrt_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_sqrt_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rsq_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rsq_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_log_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x80,0x02,0x7e,0x00,0x01,0x09,0xa1] v_log_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_exp_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x82,0x02,0x7e,0x00,0x01,0x09,0xa1] v_exp_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_frexp_mant_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x84,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_mant_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_frexp_exp_i16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x86,0x02,0x7e,0x00,0x01,0x09,0xa1] v_frexp_exp_i16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_floor_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x88,0x02,0x7e,0x00,0x01,0x09,0xa1] v_floor_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ceil_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8a,0x02,0x7e,0x00,0x01,0x09,0xa1] v_ceil_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_trunc_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8c,0x02,0x7e,0x00,0x01,0x09,0xa1] v_trunc_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_rndne_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8e,0x02,0x7e,0x00,0x01,0x09,0xa1] v_rndne_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_fract_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x90,0x02,0x7e,0x00,0x01,0x09,0xa1] v_fract_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sin_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x92,0x02,0x7e,0x00,0x01,0x09,0xa1] v_sin_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_cos_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x94,0x02,0x7e,0x00,0x01,0x09,0xa1] v_cos_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // GFX9: v_cvt_norm_i16_f16_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x9a,0x0a,0x7e,0x01,0xe4,0x20,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_i16_f16_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX9: v_cvt_norm_u16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x9c,0x0a,0x7e,0x01,0x1b,0x00,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_u16_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // GFX9: v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x9e,0x0a,0x7e,0x01,0x2f,0x01,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_screen_partition_4se_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 ; encoding: [0xfa,0x6e,0x0a,0x7e,0x01,0xe4,0x08,0x00] v_screen_partition_4se_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 @@ -359,239 +361,239 @@ v_screen_partition_4se_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask //===----------------------------------------------------------------------===// // ToDo: VOP2bInst instructions: v_add_u32, v_sub_u32 ... (vcc and ApplyMnemonic in AsmMatcherEmitter.cpp) -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mac_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x01,0x01,0xff] v_mac_f32 v0, v0, v0 row_shl:1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mac_f32_dpp v0, v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x1f,0x01,0xff] v_mac_f32 v0, v0, v0 row_shr:0xf -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mac_f32_dpp v0, v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x4d,0x08,0xaf] v_mac_f32 v0, v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x09,0xa1] v_add_f32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x14,0x00,0x01,0x09,0xa1] v_min_f32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_and_b32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x26,0x00,0x01,0x09,0xa1] v_and_b32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_i32_i24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0c,0x02,0x01,0x09,0xa1] v_mul_i32_i24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sub_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x04,0x02,0x01,0x09,0xa1] v_sub_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_subrev_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x06,0x02,0x01,0x09,0xa1] v_subrev_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0a,0x02,0x01,0x09,0xa1] v_mul_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_hi_i32_i24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0e,0x02,0x01,0x09,0xa1] v_mul_hi_i32_i24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_u32_u24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x10,0x02,0x01,0x09,0xa1] v_mul_u32_u24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_hi_u32_u24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x12,0x02,0x01,0x09,0xa1] v_mul_hi_u32_u24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x16,0x02,0x01,0x09,0xa1] v_max_f32 v1, v2 v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x18,0x02,0x01,0x09,0xa1] v_min_i32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1a,0x02,0x01,0x09,0xa1] v_max_i32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_u32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1c,0x02,0x01,0x09,0xa1] v_min_u32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_u32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1e,0x02,0x01,0x09,0xa1] v_max_u32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_lshrrev_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x20,0x02,0x01,0x09,0xa1] v_lshrrev_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ashrrev_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x22,0x02,0x01,0x09,0xa1] v_ashrrev_i32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_lshlrev_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x24,0x02,0x01,0x09,0xa1] v_lshlrev_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_or_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x28,0x02,0x01,0x09,0xa1] v_or_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_xor_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x2a,0x02,0x01,0x09,0xa1] v_xor_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3e,0x02,0x01,0x09,0xa1] v_add_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sub_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x40,0x02,0x01,0x09,0xa1] v_sub_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_subrev_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x42,0x02,0x01,0x09,0xa1] v_subrev_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x44,0x02,0x01,0x09,0xa1] v_mul_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mac_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x46,0x02,0x01,0x09,0xa1] v_mac_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_add_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x4c,0x02,0x01,0x09,0xa1] v_add_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_sub_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x4e,0x02,0x01,0x09,0xa1] v_sub_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_subrev_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x50,0x02,0x01,0x09,0xa1] v_subrev_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_mul_lo_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x52,0x02,0x01,0x09,0xa1] v_mul_lo_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_lshlrev_b16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x54,0x02,0x01,0x09,0xa1] v_lshlrev_b16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_lshrrev_b16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x56,0x02,0x01,0x09,0xa1] v_lshrrev_b16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ashrrev_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x58,0x02,0x01,0x09,0xa1] v_ashrrev_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5a,0x02,0x01,0x09,0xa1] v_max_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5c,0x02,0x01,0x09,0xa1] v_min_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5e,0x02,0x01,0x09,0xa1] v_max_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_max_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x60,0x02,0x01,0x09,0xa1] v_max_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x62,0x02,0x01,0x09,0xa1] v_min_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_min_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x64,0x02,0x01,0x09,0xa1] v_min_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI9: v_ldexp_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x66,0x02,0x01,0x09,0xa1] v_ldexp_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_add_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1] v_add_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_sub_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1] v_sub_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_subrev_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1] v_subrev_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1] v_addc_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1] v_subb_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: not a valid operand. // VI: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1] v_subbrev_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_add_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1] v_add_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_sub_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1] v_sub_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_subrev_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1] v_subrev_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_addc_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1] v_addc_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_subb_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1] v_subb_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_subbrev_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1] v_subbrev_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 -// NOSICI: error +// NOSICI: error: not a valid operand. // VI9: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0xe4,0x00,0x00] v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// NOSICI: error +// NOSICI: error: not a valid operand. // VI9: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0x0f,0x01,0x00] v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 diff --git a/llvm/test/MC/AMDGPU/vop_sdwa.s b/llvm/test/MC/AMDGPU/vop_sdwa.s index a0c71253df810..88386e046917f 100644 --- a/llvm/test/MC/AMDGPU/vop_sdwa.s +++ b/llvm/test/MC/AMDGPU/vop_sdwa.s @@ -1,41 +1,41 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=GFX89 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=GFX89 -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX89 -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --check-prefix=NOGFX89 +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOCI,NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX89 --implicit-check-not=error: +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --check-prefix=NOGFX89 --implicit-check-not=error: //---------------------------------------------------------------------------// // Check SDWA operands //---------------------------------------------------------------------------// -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x02,0x10,0x06,0x00] v_mov_b32 v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x06,0x7e,0x04,0x11,0x05,0x00] v_mov_b32 v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0 ; encoding: [0xf9,0x02,0x1e,0x7e,0x63,0x0a,0x04,0x00] v_mov_b32 v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u32_sdwa v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 ; encoding: [0xf9,0x02,0x84,0x1d,0x0d,0x0b,0x03,0x02] v_min_u32 v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u32_sdwa v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1 ; encoding: [0xf9,0x02,0xfe,0x1d,0x04,0x04,0x02,0x05] v_min_u32 v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u32_sdwa v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x90,0x1d,0xc8,0x05,0x01,0x06] v_min_u32 v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u32_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x1c,0x01,0x06,0x00,0x06] v_min_u32 v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -43,43 +43,43 @@ v_min_u32 v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_se // Check optional operands //---------------------------------------------------------------------------// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_u32_f32_sdwa v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x0e,0x00,0x7e,0x00,0x36,0x06,0x00] v_cvt_u32_f32 v0, v0 clamp dst_sel:DWORD -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_fract_f32_sdwa v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x36,0x00,0x7e,0x00,0x26,0x06,0x00] v_fract_f32 v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PAD -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_sin_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x52,0x00,0x7e,0x00,0x06,0x05,0x00] v_sin_f32 v0, v0 dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v1, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x36,0x05,0x00] v_mov_b32 v1, v0 clamp src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_trunc_f32_sdwa v1, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x38,0x02,0x7e,0x00,0x36,0x05,0x00] v_trunc_f32 v1, v0 clamp dst_sel:DWORD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_mov_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x16,0x06,0x00] v_mov_b32_sdwa v1, v0 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_add_f32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x05,0x06] v_add_f32_sdwa v0, v0, v0 dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_min_f32_sdwa v0, v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x14,0x00,0x36,0x06,0x02] v_min_f32 v0, v0, v0 clamp dst_sel:DWORD src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_and_b32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x06,0x02] v_and_b32 v0, v0, v0 dst_unused:UNUSED_PAD src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_mul_i32_i24_sdwa v1, v2, v3 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x06,0x02,0x0c,0x02,0x36,0x06,0x06] v_mul_i32_i24_sdwa v1, v2, v3 clamp @@ -87,31 +87,31 @@ v_mul_i32_i24_sdwa v1, v2, v3 clamp // Check modifiers //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_fract_f32_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x36,0x00,0x7e,0x00,0x06,0x25,0x00] v_fract_f32 v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_sin_f32_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x52,0x00,0x7e,0x00,0x06,0x35,0x00] v_sin_f32 v0, -abs(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_add_f32_sdwa v0, -|v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x35,0x12] v_add_f32 v0, -|v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_min_f32_sdwa v0, |v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x14,0x00,0x06,0x25,0x12] v_min_f32 v0, abs(v0), -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x16,0x0e,0x00] v_mov_b32_sdwa v1, sext(v0) -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_and_b32_sdwa v0, sext(v0), sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x0e,0x0a] v_and_b32 v0, sext(v0), sext(v0) dst_unused:UNUSED_PAD src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_class_f32 vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x12,0x0c] // GFX9: v_cmp_class_f32_sdwa vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x12,0x0c] v_cmp_class_f32_sdwa vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 @@ -120,477 +120,479 @@ v_cmp_class_f32_sdwa vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 // Check VOP1 opcodes //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: instruction not supported on this GPU // GFX89: v_nop ; encoding: [0xf9,0x00,0x00,0x7e,0x00,0x00,0x00,0x00] v_nop_sdwa -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_u32_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x0e,0x00,0x7e,0x00,0x06,0x05,0x00] v_cvt_u32_f32 v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_fract_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x36,0x00,0x7e,0x00,0x06,0x05,0x00] v_fract_f32 v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_sin_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x52,0x00,0x7e,0x00,0x06,0x05,0x00] v_sin_f32 v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mov_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x06,0x05,0x00] v_mov_b32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_i32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x0a,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_i32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_u32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x0c,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_u32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x10,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_i32_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f16_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x14,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f16_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x16,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_rpi_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x18,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_rpi_i32_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_flr_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x1a,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_flr_i32_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_off_f32_i4_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x1c,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_off_f32_i4 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_ubyte0_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x22,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_ubyte0 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_ubyte1_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x24,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_ubyte1 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_ubyte2_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x26,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_ubyte2 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cvt_f32_ubyte3_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x28,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f32_ubyte3 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_trunc_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x38,0x02,0x7e,0x00,0x06,0x05,0x00] v_trunc_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_ceil_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x3a,0x02,0x7e,0x00,0x06,0x05,0x00] v_ceil_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_rndne_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x3c,0x02,0x7e,0x00,0x06,0x05,0x00] v_rndne_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_floor_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x3e,0x02,0x7e,0x00,0x06,0x05,0x00] v_floor_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_exp_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x40,0x02,0x7e,0x00,0x06,0x05,0x00] v_exp_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_log_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x42,0x02,0x7e,0x00,0x06,0x05,0x00] v_log_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_rcp_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x44,0x02,0x7e,0x00,0x06,0x05,0x00] v_rcp_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_rcp_iflag_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x46,0x02,0x7e,0x00,0x06,0x05,0x00] v_rcp_iflag_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_rsq_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x48,0x02,0x7e,0x00,0x06,0x05,0x00] v_rsq_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_sqrt_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x4e,0x02,0x7e,0x00,0x06,0x05,0x00] v_sqrt_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_cos_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x54,0x02,0x7e,0x00,0x06,0x05,0x00] v_cos_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_not_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x56,0x02,0x7e,0x00,0x06,0x05,0x00] v_not_b32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_bfrev_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x58,0x02,0x7e,0x00,0x06,0x05,0x00] v_bfrev_b32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ffbh_u32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x5a,0x02,0x7e,0x00,0x06,0x05,0x00] v_ffbh_u32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ffbl_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x5c,0x02,0x7e,0x00,0x06,0x05,0x00] v_ffbl_b32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ffbh_i32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x5e,0x02,0x7e,0x00,0x06,0x05,0x00] v_ffbh_i32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_frexp_exp_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x66,0x02,0x7e,0x00,0x06,0x05,0x00] v_frexp_exp_i32_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_frexp_mant_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x68,0x02,0x7e,0x00,0x06,0x05,0x00] v_frexp_mant_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: // GFX89: v_log_legacy_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x98,0x02,0x7e,0x00,0x06,0x05,0x00] +// NOSI: error: not a valid operand. +// NOCI: error: invalid operand for instruction v_log_legacy_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: // GFX89: v_exp_legacy_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x96,0x02,0x7e,0x00,0x06,0x05,0x00] +// NOSI: error: not a valid operand. +// NOCI: error: invalid operand for instruction v_exp_legacy_f32 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_cvt_f16_u16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x72,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f16_u16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_cvt_f16_i16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x74,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_f16_i16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_cvt_u16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x76,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_u16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_cvt_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x78,0x02,0x7e,0x00,0x06,0x05,0x00] v_cvt_i16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_rcp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7a,0x02,0x7e,0x00,0x06,0x05,0x00] v_rcp_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_sqrt_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7c,0x02,0x7e,0x00,0x06,0x05,0x00] v_sqrt_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_rsq_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7e,0x02,0x7e,0x00,0x06,0x05,0x00] v_rsq_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x80,0x02,0x7e,0x00,0x06,0x05,0x00] v_log_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_exp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x82,0x02,0x7e,0x00,0x06,0x05,0x00] v_exp_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_frexp_mant_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x84,0x02,0x7e,0x00,0x06,0x05,0x00] v_frexp_mant_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_frexp_exp_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x86,0x02,0x7e,0x00,0x06,0x05,0x00] v_frexp_exp_i16_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_floor_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x88,0x02,0x7e,0x00,0x06,0x05,0x00] v_floor_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ceil_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8a,0x02,0x7e,0x00,0x06,0x05,0x00] v_ceil_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_trunc_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8c,0x02,0x7e,0x00,0x06,0x05,0x00] v_trunc_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8e,0x02,0x7e,0x00,0x06,0x05,0x00] v_rndne_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_fract_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x90,0x02,0x7e,0x00,0x06,0x05,0x00] v_fract_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_sin_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x92,0x02,0x7e,0x00,0x06,0x05,0x00] v_sin_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_cos_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x94,0x02,0x7e,0x00,0x06,0x05,0x00] v_cos_f16 v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 // GFX9: v_cvt_norm_i16_f16_sdwa v5, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x9a,0x0a,0x7e,0x01,0x06,0x16,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_i16_f16_sdwa v5, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // GFX9: v_cvt_norm_i16_f16_sdwa v5, |v1| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x9a,0x0a,0x7e,0x01,0x06,0x26,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_i16_f16_sdwa v5, |v1| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // GFX9: v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x9c,0x0a,0x7e,0x01,0x16,0x06,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD // GFX9: v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x9c,0x0a,0x7e,0x01,0x06,0x05,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_cvt_norm_u16_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 // GFX9: v_sat_pk_u8_i16_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x9e,0x0a,0x7e,0x01,0x06,0x0e,0x00] -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. v_sat_pk_u8_i16_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD //===----------------------------------------------------------------------===// // Check VOP2 opcodes //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_add_f32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x05,0x02] v_add_f32 v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_min_f32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x14,0x00,0x06,0x05,0x02] v_min_f32 v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_and_b32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x05,0x02] v_and_b32 v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_mul_i32_i24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x0c,0x02,0x06,0x05,0x02] v_mul_i32_i24 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_sub_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x04,0x02,0x06,0x05,0x02] v_sub_f32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_subrev_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x06,0x02,0x06,0x05,0x02] v_subrev_f32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_mul_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x0a,0x02,0x06,0x05,0x02] v_mul_f32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mul_hi_i32_i24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x0e,0x02,0x06,0x05,0x02] v_mul_hi_i32_i24 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_mul_u32_u24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x10,0x02,0x06,0x05,0x02] v_mul_u32_u24 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mul_hi_u32_u24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x12,0x02,0x06,0x05,0x02] v_mul_hi_u32_u24 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // GFX89: v_max_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x16,0x02,0x06,0x05,0x02] v_max_f32 v1, v2 v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_i32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x18,0x02,0x06,0x05,0x02] v_min_i32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_max_i32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x1a,0x02,0x06,0x05,0x02] v_max_i32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x1c,0x02,0x06,0x05,0x02] v_min_u32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_max_u32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x1e,0x02,0x06,0x05,0x02] v_max_u32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_lshrrev_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x20,0x02,0x06,0x05,0x02] v_lshrrev_b32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ashrrev_i32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x22,0x02,0x06,0x05,0x02] v_ashrrev_i32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_lshlrev_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x24,0x02,0x06,0x05,0x02] v_lshlrev_b32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x28,0x02,0x06,0x05,0x02] v_or_b32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_xor_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x2a,0x02,0x06,0x05,0x02] v_xor_b32 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_add_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3e,0x02,0x06,0x05,0x02] v_add_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_sub_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x40,0x02,0x06,0x05,0x02] v_sub_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_subrev_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x42,0x02,0x06,0x05,0x02] v_subrev_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mul_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x44,0x02,0x06,0x05,0x02] v_mul_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_add_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x4c,0x02,0x06,0x05,0x02] v_add_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_sub_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x4e,0x02,0x06,0x05,0x02] v_sub_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_subrev_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x50,0x02,0x06,0x05,0x02] v_subrev_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_mul_lo_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x52,0x02,0x06,0x05,0x02] v_mul_lo_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_lshlrev_b16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x54,0x02,0x06,0x05,0x02] v_lshlrev_b16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_lshrrev_b16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x56,0x02,0x06,0x05,0x02] v_lshrrev_b16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ashrrev_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x58,0x02,0x06,0x05,0x02] v_ashrrev_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_max_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5a,0x02,0x06,0x05,0x02] v_max_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5c,0x02,0x06,0x05,0x02] v_min_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_max_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5e,0x02,0x06,0x05,0x02] v_max_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_max_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x60,0x02,0x06,0x05,0x02] v_max_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x62,0x02,0x06,0x05,0x02] v_min_u16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_min_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x64,0x02,0x06,0x05,0x02] v_min_i16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // GFX89: v_ldexp_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x66,0x02,0x06,0x05,0x02] v_ldexp_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: instruction not supported on this GPU // VI: v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02] v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: instruction not supported on this GPU // VI: v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02] v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: not a valid operand. +// NOGFX9: error: instruction not supported on this GPU // VI: v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02] v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: invalid operand for instruction +// NOGFX9: error: not a valid operand. // VI: v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02] v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: invalid operand for instruction +// NOGFX9: error: not a valid operand. // VI: v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02] v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOGFX9: error: +// NOSICI: error: invalid operand for instruction +// NOGFX9: error: not a valid operand. // VI: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: not a valid operand. // GFX9: v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02] v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: not a valid operand. // GFX9: v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02] v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: not a valid operand. // GFX9: v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02] v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02] v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02] v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error +// NOSICI: error: not a valid operand. // GFX89: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06] v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICI: error -// NOVI: error +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0xc1,0x06,0x86,0x06] v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICI: error +// NOSICI: error: not a valid operand. // GFX89: v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x06,0x06,0x0e] v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD @@ -603,72 +605,72 @@ v_cndmask_b32_sdwa v5, vcc_lo, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE // Check VOPC opcodes //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_cmp_eq_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmp_eq_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x00,0x02,0x04] v_cmp_eq_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_cmp_nle_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmp_nle_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7c,0x01,0x00,0x02,0x04] v_cmp_nle_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_cmpx_gt_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa8,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_gt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa8,0x7c,0x01,0x00,0x02,0x04] v_cmpx_gt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_cmpx_nlt_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xbc,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_nlt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xbc,0x7c,0x01,0x00,0x02,0x04] v_cmpx_nlt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_lt_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x82,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_lt_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x82,0x7d,0x01,0x00,0x02,0x04] v_cmp_lt_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_t_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x8e,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_t_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x8e,0x7d,0x01,0x00,0x02,0x04] v_cmp_t_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmpx_eq_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa4,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_eq_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa4,0x7d,0x01,0x00,0x02,0x04] v_cmpx_eq_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmpx_ne_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xaa,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_ne_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xaa,0x7d,0x01,0x00,0x02,0x04] v_cmpx_ne_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_f_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x90,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_f_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x90,0x7d,0x01,0x00,0x02,0x04] v_cmp_f_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_gt_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmp_gt_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7d,0x01,0x00,0x02,0x04] v_cmp_gt_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmpx_le_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xb6,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_le_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xb6,0x7d,0x01,0x00,0x02,0x04] v_cmpx_le_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmpx_ne_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xba,0x7d,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_ne_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xba,0x7d,0x01,0x00,0x02,0x04] v_cmpx_ne_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmp_class_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmp_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x02,0x04] v_cmp_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_cmpx_class_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x22,0x7c,0x01,0x00,0x02,0x04] // GFX9: v_cmpx_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x22,0x7c,0x01,0x00,0x02,0x04] v_cmpx_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 @@ -681,22 +683,22 @@ v_cmpx_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 // v_mac_f16/f32 is prohibited //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_mac_f32_sdwa v3, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x0a,0x06,0x2c,0x04,0x16,0x05,0x06] // NOGFX9: error: instruction not supported on this GPU v_mac_f32 v3, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // VI: v_mac_f32_sdwa v15, v99, v194 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:WORD_0 src1_sel:DWORD ; encoding: [0xf9,0x84,0x1f,0x2c,0x63,0x0e,0x04,0x06] // NOGFX9: error: instruction not supported on this GPU v_mac_f32 v15, v99, v194 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // NOVI: error: invalid operand for instruction // NOGFX9: error: instruction not supported on this GPU v_mac_f32 v194, v13, v1 dst_sel:BYTE_0 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 -// NOSICI: error: +// NOSICI: error: not a valid operand. // VI: v_mac_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x46,0x02,0x06,0x05,0x02] // NOGFX9: error: instruction not supported on this GPU v_mac_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 @@ -705,312 +707,318 @@ v_mac_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_se // Scalar registers are allowed //===----------------------------------------------------------------------===// -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v1, s2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x02,0x10,0x86,0x00] v_mov_b32 v1, s2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v1, exec_lo dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x7e,0x10,0x86,0x00] v_mov_b32 v1, exec_lo dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_mov_b32_sdwa v1, ttmp12 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x78,0x10,0x86,0x00] v_mov_b32_sdwa v1, ttmp12 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x85,0x02] v_add_f32 v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v0, v0, s22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x2c,0x00,0x02,0x00,0x06,0x05,0x82] v_add_f32 v0, v0, s22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +// NOGFX9: error: invalid operand for instruction // NO: invalid operand (violates constant bus restrictions) v_add_f32 v0, exec_lo, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +// NOGFX9: error: not a valid operand. // NO: error: not a valid operand v_add_f32 v0, v1, tba_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction +// NOGFX9: error: not a valid operand. // NO: error: not a valid operand v_add_f32 v0, v1, tma_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa vcc, s1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x00,0x85,0x02] v_cmp_eq_f32_sdwa vcc, s1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa vcc, v1, s22 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x2c,0x84,0x7c,0x01,0x00,0x05,0x82] v_cmp_eq_f32_sdwa vcc, v1, s22 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_cmp_eq_f32_sdwa ttmp[12:13], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0xf8,0x05,0x02] v_cmp_eq_f32_sdwa ttmp[12:13], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU +// NOGFX9: error: not a valid operand. // NO: error: not a valid operand v_cmp_eq_f32_sdwa tba, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU +// NOGFX9: error: not a valid operand. // NO: error: not a valid operand v_cmp_eq_f32_sdwa tma, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_cmp_eq_f32_sdwa vcc, v1, ttmp15 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0xf6,0x84,0x7c,0x01,0x00,0x05,0x82] v_cmp_eq_f32_sdwa vcc, v1, ttmp15 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32_sdwa vcc, exec_lo, vcc_lo src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOVI: error: invalid operand for instruction // GFX9: v_ceil_f16_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0x66,0x06,0x86,0x00] +// NOSI: error: not a valid operand. +// NOCI: error: not a valid operand. v_ceil_f16_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD //===----------------------------------------------------------------------===// // Inline constants are allowed (though semantics is not clear yet) //===----------------------------------------------------------------------===// -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0x80,0x06,0x86,0x00] v_mov_b32_sdwa v5, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xc1,0x06,0x86,0x00] v_mov_b32_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xf0,0x06,0x86,0x00] v_mov_b32_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, -4.0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xf7,0x06,0x86,0x00] v_mov_b32_sdwa v5, -4.0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_mov_b32_sdwa v5, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0xc1,0x16,0x8e,0x00] v_mov_b32_sdwa v5, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, -1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x06,0x86,0x06] v_add_f32_sdwa v5, -1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, |-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x16,0xa6,0x06] v_add_f32_sdwa v5, |-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, neg(-1), -|v2| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x16,0x96,0x36] v_add_f32_sdwa v5, neg(-1), -|v2| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, -|-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xc1,0x16,0xb6,0x06] v_add_f32_sdwa v5, -|-1|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, 0.5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf0,0x06,0x86,0x06] v_add_f32_sdwa v5, 0.5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, |-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf7,0x16,0xa6,0x06] v_add_f32_sdwa v5, |-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, neg(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf7,0x16,0x96,0x06] v_add_f32_sdwa v5, neg(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, -|-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0xf7,0x16,0xb6,0x06] v_add_f32_sdwa v5, -|-4.0|, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -4.0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0x86] v_add_f32_sdwa v5, v2, -4.0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, |-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0xa6] v_add_f32_sdwa v5, v2, |-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, neg(-4.0) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0x96] v_add_f32_sdwa v5, v2, neg(-4.0) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -|-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xee,0x0b,0x02,0x02,0x16,0x06,0xb6] v_add_f32_sdwa v5, v2, -|-4.0| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0x86] v_add_f32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, |-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0xa6] v_add_f32_sdwa v5, v2, |-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, neg(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0x96] v_add_f32_sdwa v5, v2, neg(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_add_f32_sdwa v5, v2, -|-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x02,0x02,0x16,0x06,0xb6] v_add_f32_sdwa v5, v2, -|-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x26,0xf7,0x16,0x86,0x06] v_and_b32_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x26,0xf7,0x16,0x8e,0x06] v_and_b32_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x26,0x02,0x16,0x06,0x86] v_and_b32_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_and_b32_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x26,0x02,0x16,0x06,0x8e] v_and_b32_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0x86,0x00] v_exp_f16_sdwa v5, -1 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, |-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0xa6,0x00] v_exp_f16_sdwa v5, |-1| -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, neg(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0x96,0x00] v_exp_f16_sdwa v5, neg(-1) -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, -|-1| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xc1,0x16,0xb6,0x00] v_exp_f16_sdwa v5, -|-1| -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0x86,0x00] v_exp_f16_sdwa v5, 0.5 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, |0.5| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0xa6,0x00] v_exp_f16_sdwa v5, |0.5| -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, neg(0.5) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0x96,0x00] v_exp_f16_sdwa v5, neg(0.5) -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_exp_f16_sdwa v5, -|0.5| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x82,0x0a,0x7e,0xf0,0x16,0xb6,0x00] v_exp_f16_sdwa v5, -|0.5| -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_max_i16_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // NOGFX9: error: invalid operand for instruction v_max_i16_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_max_i16_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x60,0x02,0x16,0x06,0x86] v_max_i16_sdwa v5, v2, -1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: invalid operand for instruction // GFX9: v_max_i16_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x0b,0x60,0x02,0x16,0x06,0x8e] v_max_i16_sdwa v5, v2, sext(-1) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], -4.0, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0x86,0x06] v_cmp_eq_f32_sdwa s[6:7], -4.0, v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], |-4.0|, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0xa6,0x06] v_cmp_eq_f32_sdwa s[6:7], |-4.0|, v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], neg(-4.0), v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0x96,0x06] v_cmp_eq_f32_sdwa s[6:7], neg(-4.0), v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], -|-4.0|, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x84,0x7c,0xf7,0x86,0xb6,0x06] v_cmp_eq_f32_sdwa s[6:7], -|-4.0|, v2 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, -1 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0x86] v_cmp_eq_f32_sdwa s[6:7], v2, -1 src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, |-1| src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0xa6] v_cmp_eq_f32_sdwa s[6:7], v2, |-1| src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, neg(-1) src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0x96] v_cmp_eq_f32_sdwa s[6:7], v2, neg(-1) src0_sel:DWORD src1_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa s[6:7], v2, -|-1| src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x82,0x85,0x7c,0x02,0x86,0x06,0xb6] v_cmp_eq_f32_sdwa s[6:7], v2, -|-1| src0_sel:DWORD src1_sel:DWORD @@ -1018,19 +1026,19 @@ v_cmp_eq_f32_sdwa s[6:7], v2, -|-1| src0_sel:DWORD src1_sel:DWORD // Literals are not allowed //===----------------------------------------------------------------------===// -// NOSICI: error: +// NOSICI: error: invalid operand for instruction // NOGFX89: error: invalid operand for instruction v_add_f32 v0, v1, 3.45 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // NOGFX89: error: invalid operand for instruction v_cmpx_class_f32 vcc, v1, 200 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // NOGFX89: error: invalid operand for instruction v_cmpx_class_f32 vcc, 200, v1 src0_sel:BYTE_2 src1_sel:WORD_0 -// NOSICI: error: +// NOSICI: error: not a valid operand. // NOGFX89: error: invalid operand for instruction v_mov_b32_sdwa v5, -17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD @@ -1038,18 +1046,18 @@ v_mov_b32_sdwa v5, -17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD // VOPC with arbitrary SGPR destination //===----------------------------------------------------------------------===// -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_cmp_eq_f32_sdwa s[2:3], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x82,0x05,0x02] v_cmp_eq_f32_sdwa s[2:3], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_cmp_eq_f32_sdwa exec, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0xfe,0x05,0x02] v_cmp_eq_f32_sdwa exec, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: invalid operand for instruction // GFX9: v_cmp_eq_f32_sdwa exec, s2, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x02,0xfe,0x85,0x02] v_cmp_eq_f32_sdwa exec, s2, v2 src0_sel:WORD_1 src1_sel:BYTE_2 @@ -1057,23 +1065,23 @@ v_cmp_eq_f32_sdwa exec, s2, v2 src0_sel:WORD_1 src1_sel:BYTE_2 // OMod output modifier allowed //===----------------------------------------------------------------------===// -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_trunc_f32_sdwa v1, v2 mul:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x38,0x02,0x7e,0x02,0x50,0x06,0x00] v_trunc_f32 v1, v2 mul:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_trunc_f32_sdwa v1, v2 clamp div:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x38,0x02,0x7e,0x02,0xf0,0x06,0x00] v_trunc_f32 v1, v2 clamp div:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_add_f32_sdwa v0, v0, v0 mul:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x46,0x05,0x02] v_add_f32 v0, v0, v0 mul:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 -// NOSICI: error: -// NOVI: error: +// NOSICI: error: invalid operand for instruction +// NOVI: error: instruction not supported on this GPU // GFX9: v_add_f32_sdwa v0, v0, v0 clamp div:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0xe6,0x05,0x02] v_add_f32 v0, v0, v0 clamp div:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 @@ -1081,8 +1089,8 @@ v_add_f32 v0, v0, v0 clamp div:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WO // Check Instructions //---------------------------------------------------------------------------// -// NOSICI: error: -// NOVI: error: +// NOSICI: error: not a valid operand. +// NOVI: error: not a valid operand. // GFX9: v_screen_partition_4se_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:BYTE_0 ; encoding: [0xf9,0x6e,0x0a,0x7e,0x01,0x16,0x00,0x00] v_screen_partition_4se_b32_sdwa v5, v1 src0_sel:BYTE_0 diff --git a/llvm/test/MC/AMDGPU/vopc-errs.s b/llvm/test/MC/AMDGPU/vopc-errs.s index bc8902f051ad7..4998aebe0b04b 100644 --- a/llvm/test/MC/AMDGPU/vopc-errs.s +++ b/llvm/test/MC/AMDGPU/vopc-errs.s @@ -1,6 +1,6 @@ -// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck --implicit-check-not=error: %s // Force 32-bit encoding with non-vcc result diff --git a/llvm/test/MC/AMDGPU/vopc-vi.s b/llvm/test/MC/AMDGPU/vopc-vi.s index f79923dfbd2ec..f4c796528200f 100644 --- a/llvm/test/MC/AMDGPU/vopc-vi.s +++ b/llvm/test/MC/AMDGPU/vopc-vi.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICI %s - // RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICI --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefix=NOSICI --implicit-check-not=error: %s v_cmp_class_f16 vcc, v2, v4 // VI: v_cmp_class_f16_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x28,0x7c] diff --git a/llvm/test/MC/AMDGPU/wave32.s b/llvm/test/MC/AMDGPU/wave32.s index b9532aebd1579..b9f6af4b28169 100644 --- a/llvm/test/MC/AMDGPU/wave32.s +++ b/llvm/test/MC/AMDGPU/wave32.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX1032 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX1064 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX1032-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX1064-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck -check-prefix=GFX1032-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck -check-prefix=GFX1064-ERR --implicit-check-not=error: %s v_cmp_ge_i32_e32 s0, v0 // GFX1032: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d] diff --git a/llvm/test/MC/AMDGPU/xdl-insts-err.s b/llvm/test/MC/AMDGPU/xdl-insts-err.s index 8f596bea7aad0..d774260bf941c 100644 --- a/llvm/test/MC/AMDGPU/xdl-insts-err.s +++ b/llvm/test/MC/AMDGPU/xdl-insts-err.s @@ -1,5 +1,5 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX906-ERR %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX908-ERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX906-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX908-ERR --implicit-check-not=error: %s // GFX906-ERR: error: instruction not supported on this GPU v_dot2c_f32_f16 v0, v1, v2 diff --git a/llvm/test/MC/AMDGPU/xnack-mask.s b/llvm/test/MC/AMDGPU/xnack-mask.s index c88a8c2985070..0fa5242d37899 100644 --- a/llvm/test/MC/AMDGPU/xnack-mask.s +++ b/llvm/test/MC/AMDGPU/xnack-mask.s @@ -1,9 +1,9 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1001 -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1001 %s 2>&1 | FileCheck -check-prefix=NOSICIVI10 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=stoney -show-encoding %s 2>&1 | FileCheck -check-prefix=XNACKERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=stoney %s 2>&1 | FileCheck -check-prefix=XNACKERR --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=stoney -show-encoding %s | FileCheck -check-prefix=XNACK %s s_mov_b64 xnack_mask, -1 From 92a541978618674ce112b2f500853218fed24db8 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 31 Aug 2020 12:22:24 -0700 Subject: [PATCH 023/465] [flang] Support multiple CookedSource instances These are owned by an instance of a new class AllCookedSources. This removes the need for a Scope to own a string containing a module's cooked source stream, and will enable errors to be emitted when parsing module files in the future. Differential Revision: https://reviews.llvm.org/D86891 --- flang/include/flang/Lower/Bridge.h | 12 +- flang/include/flang/Lower/ConvertType.h | 5 - .../flang/Parser/instrumented-parser.h | 2 +- flang/include/flang/Parser/message.h | 16 +-- flang/include/flang/Parser/parsing.h | 13 ++- flang/include/flang/Parser/provenance.h | 61 +++++++--- flang/include/flang/Parser/user-state.h | 11 +- flang/include/flang/Semantics/scope.h | 5 - flang/include/flang/Semantics/semantics.h | 13 +-- flang/lib/Parser/debug-parser.cpp | 4 +- flang/lib/Parser/instrumented-parser.cpp | 7 +- flang/lib/Parser/message.cpp | 28 ++--- flang/lib/Parser/parsing.cpp | 28 ++--- flang/lib/Parser/prescan.cpp | 29 ++--- flang/lib/Parser/prescan.h | 14 +-- flang/lib/Parser/provenance.cpp | 107 +++++++++++------- flang/lib/Semantics/mod-file.cpp | 3 +- flang/lib/Semantics/scope.cpp | 8 -- flang/lib/Semantics/semantics.cpp | 9 +- flang/test/Semantics/getsymbols02.f90 | 4 +- flang/tools/f18-parse-demo/f18-parse-demo.cpp | 7 +- flang/tools/f18/f18.cpp | 15 +-- flang/unittests/Evaluate/intrinsics.cpp | 9 +- 23 files changed, 230 insertions(+), 180 deletions(-) diff --git a/flang/include/flang/Lower/Bridge.h b/flang/include/flang/Lower/Bridge.h index aee7a0ef5bd8d..ebaffaa4a6e0e 100644 --- a/flang/include/flang/Lower/Bridge.h +++ b/flang/include/flang/Lower/Bridge.h @@ -34,7 +34,7 @@ namespace evaluate { class IntrinsicProcTable; } // namespace evaluate namespace parser { -class CookedSource; +class AllCookedSources; struct Program; } // namespace parser namespace semantics { @@ -55,8 +55,8 @@ class LoweringBridge { static LoweringBridge create(const Fortran::common::IntrinsicTypeDefaultKinds &defaultKinds, const Fortran::evaluate::IntrinsicProcTable &intrinsics, - const Fortran::parser::CookedSource &cooked) { - return LoweringBridge{defaultKinds, intrinsics, cooked}; + const Fortran::parser::AllCookedSources &allCooked) { + return LoweringBridge{defaultKinds, intrinsics, allCooked}; } //===--------------------------------------------------------------------===// @@ -71,7 +71,7 @@ class LoweringBridge { const Fortran::evaluate::IntrinsicProcTable &getIntrinsicTable() const { return intrinsics; } - const Fortran::parser::CookedSource *getCookedSource() const { + const Fortran::parser::AllCookedSources *getCookedSource() const { return cooked; } @@ -99,13 +99,13 @@ class LoweringBridge { explicit LoweringBridge( const Fortran::common::IntrinsicTypeDefaultKinds &defaultKinds, const Fortran::evaluate::IntrinsicProcTable &intrinsics, - const Fortran::parser::CookedSource &cooked); + const Fortran::parser::AllCookedSources &); LoweringBridge() = delete; LoweringBridge(const LoweringBridge &) = delete; const Fortran::common::IntrinsicTypeDefaultKinds &defaultKinds; const Fortran::evaluate::IntrinsicProcTable &intrinsics; - const Fortran::parser::CookedSource *cooked; + const Fortran::parser::AllCookedSources *cooked; std::unique_ptr context; std::unique_ptr module; fir::KindMapping kindMap; diff --git a/flang/include/flang/Lower/ConvertType.h b/flang/include/flang/Lower/ConvertType.h index f4046efba1127..b807d62038186 100644 --- a/flang/include/flang/Lower/ConvertType.h +++ b/flang/include/flang/Lower/ConvertType.h @@ -48,11 +48,6 @@ template class Type; } // namespace evaluate -namespace parser { -class CharBlock; -class CookedSource; -} // namespace parser - namespace semantics { class Symbol; } // namespace semantics diff --git a/flang/include/flang/Parser/instrumented-parser.h b/flang/include/flang/Parser/instrumented-parser.h index 51dbd5f03c177..1bc1c526dc9f7 100644 --- a/flang/include/flang/Parser/instrumented-parser.h +++ b/flang/include/flang/Parser/instrumented-parser.h @@ -31,7 +31,7 @@ class ParsingLog { bool Fails(const char *at, const MessageFixedText &tag, ParseState &); void Note(const char *at, const MessageFixedText &tag, bool pass, const ParseState &); - void Dump(llvm::raw_ostream &, const CookedSource &) const; + void Dump(llvm::raw_ostream &, const AllCookedSources &) const; private: struct LogForPosition { diff --git a/flang/include/flang/Parser/message.h b/flang/include/flang/Parser/message.h index 46a72e08a237d..cd1df0a968e72 100644 --- a/flang/include/flang/Parser/message.h +++ b/flang/include/flang/Parser/message.h @@ -186,14 +186,14 @@ class Message : public common::ReferenceCounted { bool SortBefore(const Message &that) const; bool IsFatal() const; std::string ToString() const; - std::optional GetProvenanceRange(const CookedSource &) const; - void Emit(llvm::raw_ostream &, const CookedSource &, + std::optional GetProvenanceRange( + const AllCookedSources &) const; + void Emit(llvm::raw_ostream &, const AllCookedSources &, bool echoSourceLine = true) const; - // If this Message or any of its attachments locates itself via a CharBlock - // within a particular CookedSource, replace its location with the - // corresponding ProvenanceRange. - void ResolveProvenances(const CookedSource &); + // If this Message or any of its attachments locates itself via a CharBlock, + // replace its location with the corresponding ProvenanceRange. + void ResolveProvenances(const AllCookedSources &); bool IsMergeable() const { return std::holds_alternative(text_); @@ -255,8 +255,8 @@ class Messages { bool Merge(const Message &); void Merge(Messages &&); void Copy(const Messages &); - void ResolveProvenances(const CookedSource &); - void Emit(llvm::raw_ostream &, const CookedSource &cooked, + void ResolveProvenances(const AllCookedSources &); + void Emit(llvm::raw_ostream &, const AllCookedSources &, bool echoSourceLines = true) const; void AttachTo(Message &); bool AnyFatalError() const; diff --git a/flang/include/flang/Parser/parsing.h b/flang/include/flang/Parser/parsing.h index 9f8bff9e1d70a..6594f97088d58 100644 --- a/flang/include/flang/Parser/parsing.h +++ b/flang/include/flang/Parser/parsing.h @@ -40,15 +40,17 @@ struct Options { class Parsing { public: - explicit Parsing(AllSources &); + explicit Parsing(AllCookedSources &); ~Parsing(); bool consumedWholeFile() const { return consumedWholeFile_; } const char *finalRestingPlace() const { return finalRestingPlace_; } - CookedSource &cooked() { return cooked_; } + AllCookedSources &allCooked() { return allCooked_; } Messages &messages() { return messages_; } std::optional &parseTree() { return parseTree_; } + const CookedSource &cooked() const { return DEREF(currentCooked_); } + const SourceFile *Prescan(const std::string &path, Options); void DumpCookedChars(llvm::raw_ostream &) const; void DumpProvenance(llvm::raw_ostream &) const; @@ -58,13 +60,14 @@ class Parsing { void EmitMessage(llvm::raw_ostream &o, const char *at, const std::string &message, bool echoSourceLine = false) const { - cooked_.allSources().EmitMessage( - o, cooked_.GetProvenanceRange(CharBlock(at)), message, echoSourceLine); + allCooked_.allSources().EmitMessage(o, + allCooked_.GetProvenanceRange(CharBlock(at)), message, echoSourceLine); } private: Options options_; - CookedSource cooked_; + AllCookedSources &allCooked_; + CookedSource *currentCooked_{nullptr}; Messages messages_; bool consumedWholeFile_{false}; const char *finalRestingPlace_{nullptr}; diff --git a/flang/include/flang/Parser/provenance.h b/flang/include/flang/Parser/provenance.h index b543cd7d7b4ef..52aac931e8995 100644 --- a/flang/include/flang/Parser/provenance.h +++ b/flang/include/flang/Parser/provenance.h @@ -17,6 +17,7 @@ #include "flang/Common/interval.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -213,28 +214,22 @@ class AllSources { Encoding encoding_{Encoding::UTF_8}; }; +// Represents the result of preprocessing and prescanning a single source +// file (and all its inclusions) or module file. Parsers operate within +// single instances of CookedSource. class CookedSource { public: - explicit CookedSource(AllSources &); - ~CookedSource(); - - AllSources &allSources() { return allSources_; } - const AllSources &allSources() const { return allSources_; } const std::string &data() const { return data_; } - bool IsValid(const char *p) const { + bool Contains(const char *p) const { return p >= &data_.front() && p <= &data_.back() + 1; } - bool IsValid(CharBlock range) const { - return !range.empty() && IsValid(range.begin()) && IsValid(range.end() - 1); + bool Contains(CharBlock range) const { + return !range.empty() && Contains(range.begin()) && + Contains(range.end() - 1); } - bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); } std::optional GetProvenanceRange(CharBlock) const; - std::optional GetCharBlockFromLineAndColumns( - int line, int startColumn, int endColumn) const; - std::optional> - GetSourcePositionRange(CharBlock) const; std::optional GetCharBlock(ProvenanceRange) const; // The result of a Put() is the offset that the new data @@ -256,17 +251,51 @@ class CookedSource { } std::size_t BufferedBytes() const; - void Marshal(); // marshals text into one contiguous block - void CompileProvenanceRangeToOffsetMappings(); + void Marshal(AllSources &); // marshals text into one contiguous block + void CompileProvenanceRangeToOffsetMappings(AllSources &); std::string AcquireData() { return std::move(data_); } llvm::raw_ostream &Dump(llvm::raw_ostream &) const; private: - AllSources &allSources_; CharBuffer buffer_; // before Marshal() std::string data_; // all of it, prescanned and preprocessed OffsetToProvenanceMappings provenanceMap_; ProvenanceRangeToOffsetMappings invertedMap_; }; + +class AllCookedSources { +public: + explicit AllCookedSources(AllSources &); + ~AllCookedSources(); + + AllSources &allSources() { return allSources_; } + const AllSources &allSources() const { return allSources_; } + + CookedSource &NewCookedSource(); + + template // const char * or CharBlock + const CookedSource *Find(A x) const { + for (const auto &c : cooked_) { + if (c.Contains(x)) { + return &c; + } + } + return nullptr; + } + + bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); } + + std::optional GetProvenanceRange(CharBlock) const; + std::optional GetCharBlockFromLineAndColumns( + int line, int startColumn, int endColumn) const; + std::optional> + GetSourcePositionRange(CharBlock) const; + std::optional GetCharBlock(ProvenanceRange) const; + void Dump(llvm::raw_ostream &) const; + +private: + AllSources &allSources_; + std::list cooked_; // owns all CookedSource instances +}; } // namespace Fortran::parser #endif // FORTRAN_PARSER_PROVENANCE_H_ diff --git a/flang/include/flang/Parser/user-state.h b/flang/include/flang/Parser/user-state.h index 75757d2f305a6..6a4cf9736f1ff 100644 --- a/flang/include/flang/Parser/user-state.h +++ b/flang/include/flang/Parser/user-state.h @@ -26,7 +26,7 @@ namespace Fortran::parser { -class CookedSource; +class AllCookedSources; class ParsingLog; class ParseState; @@ -34,10 +34,11 @@ class Success {}; // for when one must return something that's present class UserState { public: - UserState(const CookedSource &cooked, common::LanguageFeatureControl features) - : cooked_{cooked}, features_{features} {} + UserState(const AllCookedSources &allCooked, + common::LanguageFeatureControl features) + : allCooked_{allCooked}, features_{features} {} - const CookedSource &cooked() const { return cooked_; } + const AllCookedSources &allCooked() const { return allCooked_; } const common::LanguageFeatureControl &features() const { return features_; } llvm::raw_ostream *debugOutput() const { return debugOutput_; } @@ -89,7 +90,7 @@ class UserState { } private: - const CookedSource &cooked_; + const AllCookedSources &allCooked_; llvm::raw_ostream *debugOutput_{nullptr}; diff --git a/flang/include/flang/Semantics/scope.h b/flang/include/flang/Semantics/scope.h index 5ebe5f32eb677..853d7044f7fd5 100644 --- a/flang/include/flang/Semantics/scope.h +++ b/flang/include/flang/Semantics/scope.h @@ -187,10 +187,6 @@ class Scope { const DeclTypeSpec &MakeTypeStarType(); const DeclTypeSpec &MakeClassStarType(); - // For modules read from module files, this is the stream of characters - // that are referenced by SourceName objects. - void set_chars(parser::CookedSource &); - std::size_t size() const { return size_; } void set_size(std::size_t size) { size_ = size; } std::size_t alignment() const { return alignment_; } @@ -245,7 +241,6 @@ class Scope { mapType crayPointers_; std::map> submodules_; std::list declTypeSpecs_; - std::string chars_; std::optional importKind_; std::set importNames_; DerivedTypeSpec *derivedTypeSpec_{nullptr}; // dTS->scope() == this diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index 3c7ba98f66204..4c2c0e75992a4 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -30,7 +30,7 @@ class IntrinsicTypeDefaultKinds; namespace Fortran::parser { struct Name; struct Program; -class CookedSource; +class AllCookedSources; struct AssociateConstruct; struct BlockConstruct; struct CaseConstruct; @@ -60,7 +60,7 @@ using ConstructStack = std::vector; class SemanticsContext { public: SemanticsContext(const common::IntrinsicTypeDefaultKinds &, - const common::LanguageFeatureControl &, parser::AllSources &); + const common::LanguageFeatureControl &, parser::AllCookedSources &); ~SemanticsContext(); const common::IntrinsicTypeDefaultKinds &defaultKinds() const { @@ -89,7 +89,7 @@ class SemanticsContext { Scope &globalScope() { return globalScope_; } parser::Messages &messages() { return messages_; } evaluate::FoldingContext &foldingContext() { return foldingContext_; } - parser::AllSources &allSources() { return allSources_; } + parser::AllCookedSources &allCookedSources() { return allCookedSources_; } SemanticsContext &set_location( const std::optional &location) { @@ -179,7 +179,7 @@ class SemanticsContext { const common::IntrinsicTypeDefaultKinds &defaultKinds_; const common::LanguageFeatureControl languageFeatures_; - parser::AllSources &allSources_; + parser::AllCookedSources &allCookedSources_; std::optional location_; std::vector searchDirectories_; std::string moduleDirectory_{"."s}; @@ -204,8 +204,8 @@ class SemanticsContext { class Semantics { public: explicit Semantics(SemanticsContext &context, parser::Program &program, - parser::CookedSource &cooked, bool debugModuleWriter = false) - : context_{context}, program_{program}, cooked_{cooked} { + const parser::CookedSource &cooked, bool debugModuleWriter = false) + : context_{context}, program_{program} { context.set_debugModuleWriter(debugModuleWriter); context.globalScope().AddSourceRange(parser::CharBlock{cooked.data()}); } @@ -223,7 +223,6 @@ class Semantics { private: SemanticsContext &context_; parser::Program &program_; - const parser::CookedSource &cooked_; }; // Base class for semantics checkers. diff --git a/flang/lib/Parser/debug-parser.cpp b/flang/lib/Parser/debug-parser.cpp index dbcc64f14bb1d..af5da091cde63 100644 --- a/flang/lib/Parser/debug-parser.cpp +++ b/flang/lib/Parser/debug-parser.cpp @@ -18,9 +18,9 @@ std::optional DebugParser::Parse(ParseState &state) const { std::string note{str_, length_}; Message message{state.GetLocation(), "parser debug: %s"_en_US, note}; message.SetContext(state.context().get()); - message.Emit(*out, ustate->cooked(), true); + message.Emit(*out, ustate->allCooked(), true); } } - return {Success{}}; + return Success{}; } } // namespace Fortran::parser diff --git a/flang/lib/Parser/instrumented-parser.cpp b/flang/lib/Parser/instrumented-parser.cpp index 765d292193958..6687aa1bbe542 100644 --- a/flang/lib/Parser/instrumented-parser.cpp +++ b/flang/lib/Parser/instrumented-parser.cpp @@ -63,14 +63,15 @@ void ParsingLog::Note(const char *at, const MessageFixedText &tag, bool pass, } } -void ParsingLog::Dump(llvm::raw_ostream &o, const CookedSource &cooked) const { +void ParsingLog::Dump( + llvm::raw_ostream &o, const AllCookedSources &allCooked) const { for (const auto &posLog : perPos_) { const char *at{reinterpret_cast(posLog.first)}; for (const auto &tagLog : posLog.second.perTag) { - Message{at, tagLog.first}.Emit(o, cooked, true); + Message{at, tagLog.first}.Emit(o, allCooked, true); auto &entry{tagLog.second}; o << " " << (entry.pass ? "pass" : "fail") << " " << entry.count << '\n'; - entry.messages.Emit(o, cooked); + entry.messages.Emit(o, allCooked); } } } diff --git a/flang/lib/Parser/message.cpp b/flang/lib/Parser/message.cpp index 87594d64a8c11..6819ee4d83b2f 100644 --- a/flang/lib/Parser/message.cpp +++ b/flang/lib/Parser/message.cpp @@ -165,43 +165,43 @@ std::string Message::ToString() const { text_); } -void Message::ResolveProvenances(const CookedSource &cooked) { +void Message::ResolveProvenances(const AllCookedSources &allCooked) { if (CharBlock * cb{std::get_if(&location_)}) { if (std::optional resolved{ - cooked.GetProvenanceRange(*cb)}) { + allCooked.GetProvenanceRange(*cb)}) { location_ = *resolved; } } if (Message * attachment{attachment_.get()}) { - attachment->ResolveProvenances(cooked); + attachment->ResolveProvenances(allCooked); } } std::optional Message::GetProvenanceRange( - const CookedSource &cooked) const { + const AllCookedSources &allCooked) const { return std::visit( common::visitors{ - [&](CharBlock cb) { return cooked.GetProvenanceRange(cb); }, + [&](CharBlock cb) { return allCooked.GetProvenanceRange(cb); }, [](const ProvenanceRange &pr) { return std::make_optional(pr); }, }, location_); } -void Message::Emit(llvm::raw_ostream &o, const CookedSource &cooked, +void Message::Emit(llvm::raw_ostream &o, const AllCookedSources &allCooked, bool echoSourceLine) const { - std::optional provenanceRange{GetProvenanceRange(cooked)}; + std::optional provenanceRange{GetProvenanceRange(allCooked)}; std::string text; if (IsFatal()) { text += "error: "; } text += ToString(); - const AllSources &sources{cooked.allSources()}; + const AllSources &sources{allCooked.allSources()}; sources.EmitMessage(o, provenanceRange, text, echoSourceLine); if (attachmentIsContext_) { for (const Message *context{attachment_.get()}; context; context = context->attachment_.get()) { std::optional contextProvenance{ - context->GetProvenanceRange(cooked)}; + context->GetProvenanceRange(allCooked)}; text = "in the context: "; text += context->ToString(); // TODO: don't echo the source lines of a context when it's the @@ -213,7 +213,7 @@ void Message::Emit(llvm::raw_ostream &o, const CookedSource &cooked, } else { for (const Message *attachment{attachment_.get()}; attachment; attachment = attachment->attachment_.get()) { - sources.EmitMessage(o, attachment->GetProvenanceRange(cooked), + sources.EmitMessage(o, attachment->GetProvenanceRange(allCooked), attachment->ToString(), echoSourceLine); } } @@ -300,13 +300,13 @@ void Messages::Copy(const Messages &that) { } } -void Messages::ResolveProvenances(const CookedSource &cooked) { +void Messages::ResolveProvenances(const AllCookedSources &allCooked) { for (Message &m : messages_) { - m.ResolveProvenances(cooked); + m.ResolveProvenances(allCooked); } } -void Messages::Emit(llvm::raw_ostream &o, const CookedSource &cooked, +void Messages::Emit(llvm::raw_ostream &o, const AllCookedSources &allCooked, bool echoSourceLines) const { std::vector sorted; for (const auto &msg : messages_) { @@ -315,7 +315,7 @@ void Messages::Emit(llvm::raw_ostream &o, const CookedSource &cooked, std::stable_sort(sorted.begin(), sorted.end(), [](const Message *x, const Message *y) { return x->SortBefore(*y); }); for (const Message *msg : sorted) { - msg->Emit(o, cooked, echoSourceLines); + msg->Emit(o, allCooked, echoSourceLines); } } diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp index b77242ae08769..819f3cf99867a 100644 --- a/flang/lib/Parser/parsing.cpp +++ b/flang/lib/Parser/parsing.cpp @@ -17,12 +17,12 @@ namespace Fortran::parser { -Parsing::Parsing(AllSources &s) : cooked_{s} {} +Parsing::Parsing(AllCookedSources &allCooked) : allCooked_{allCooked} {} Parsing::~Parsing() {} const SourceFile *Parsing::Prescan(const std::string &path, Options options) { options_ = options; - AllSources &allSources{cooked_.allSources()}; + AllSources &allSources{allCooked_.allSources()}; if (options.isModuleFile) { for (const auto &path : options.searchDirectories) { allSources.PushSearchPathDirectory(path); @@ -63,7 +63,9 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { preprocessor.Undefine(predef.first); } } - Prescanner prescanner{messages_, cooked_, preprocessor, options.features}; + currentCooked_ = &allCooked_.NewCookedSource(); + Prescanner prescanner{ + messages_, *currentCooked_, allSources, preprocessor, options.features}; prescanner.set_fixedForm(options.isFixedForm) .set_fixedFormColumnLimit(options.fixedFormColumns) .AddCompilerDirectiveSentinel("dir$"); @@ -77,21 +79,21 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) { ProvenanceRange range{allSources.AddIncludedFile( *sourceFile, ProvenanceRange{}, options.isModuleFile)}; prescanner.Prescan(range); - if (cooked_.BufferedBytes() == 0 && !options.isModuleFile) { + if (currentCooked_->BufferedBytes() == 0 && !options.isModuleFile) { // Input is empty. Append a newline so that any warning // message about nonstandard usage will have provenance. - cooked_.Put('\n', range.start()); + currentCooked_->Put('\n', range.start()); } - cooked_.Marshal(); + currentCooked_->Marshal(allSources); if (options.needProvenanceRangeToCharBlockMappings) { - cooked_.CompileProvenanceRangeToOffsetMappings(); + currentCooked_->CompileProvenanceRangeToOffsetMappings(allSources); } return sourceFile; } void Parsing::DumpCookedChars(llvm::raw_ostream &out) const { - UserState userState{cooked_, common::LanguageFeatureControl{}}; - ParseState parseState{cooked_}; + UserState userState{allCooked_, common::LanguageFeatureControl{}}; + ParseState parseState{cooked()}; parseState.set_inFixedForm(options_.isFixedForm).set_userState(&userState); while (std::optional p{parseState.GetNextChar()}) { out << **p; @@ -99,19 +101,19 @@ void Parsing::DumpCookedChars(llvm::raw_ostream &out) const { } void Parsing::DumpProvenance(llvm::raw_ostream &out) const { - cooked_.Dump(out); + allCooked_.Dump(out); } void Parsing::DumpParsingLog(llvm::raw_ostream &out) const { - log_.Dump(out, cooked_); + log_.Dump(out, allCooked_); } void Parsing::Parse(llvm::raw_ostream &out) { - UserState userState{cooked_, options_.features}; + UserState userState{allCooked_, options_.features}; userState.set_debugOutput(out) .set_instrumentedParse(options_.instrumentedParse) .set_log(&log_); - ParseState parseState{cooked_}; + ParseState parseState{cooked()}; parseState.set_inFixedForm(options_.isFixedForm).set_userState(&userState); parseTree_ = program.Parse(parseState); CHECK( diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index 5e6f13797646b..8e8e57c1334d9 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -26,14 +26,16 @@ using common::LanguageFeature; static constexpr int maxPrescannerNesting{100}; Prescanner::Prescanner(Messages &messages, CookedSource &cooked, - Preprocessor &preprocessor, common::LanguageFeatureControl lfc) - : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, - features_{lfc}, encoding_{cooked.allSources().encoding()} {} + AllSources &allSources, Preprocessor &preprocessor, + common::LanguageFeatureControl lfc) + : messages_{messages}, cooked_{cooked}, allSources_{allSources}, + preprocessor_{preprocessor}, features_{lfc}, + encoding_{allSources_.encoding()} {} Prescanner::Prescanner(const Prescanner &that) : messages_{that.messages_}, cooked_{that.cooked_}, - preprocessor_{that.preprocessor_}, features_{that.features_}, - inFixedForm_{that.inFixedForm_}, + allSources_{that.allSources_}, preprocessor_{that.preprocessor_}, + features_{that.features_}, inFixedForm_{that.inFixedForm_}, fixedFormColumnLimit_{that.fixedFormColumnLimit_}, encoding_{that.encoding_}, prescannerNesting_{that.prescannerNesting_ + 1}, @@ -59,10 +61,10 @@ static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { } void Prescanner::Prescan(ProvenanceRange range) { - AllSources &allSources{cooked_.allSources()}; startProvenance_ = range.start(); std::size_t offset{0}; - const SourceFile *source{allSources.GetSourceFile(startProvenance_, &offset)}; + const SourceFile *source{ + allSources_.GetSourceFile(startProvenance_, &offset)}; CHECK(source); start_ = source->content().data() + offset; limit_ = start_ + range.size(); @@ -84,7 +86,7 @@ void Prescanner::Prescan(ProvenanceRange range) { dir += "free"; } dir += '\n'; - TokenSequence tokens{dir, allSources.AddCompilerInsertion(dir).start()}; + TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; tokens.Emit(cooked_); } } @@ -761,14 +763,13 @@ void Prescanner::FortranInclude(const char *firstQuote) { std::string buf; llvm::raw_string_ostream error{buf}; Provenance provenance{GetProvenance(nextLine_)}; - AllSources &allSources{cooked_.allSources()}; - const SourceFile *currentFile{allSources.GetSourceFile(provenance)}; + const SourceFile *currentFile{allSources_.GetSourceFile(provenance)}; if (currentFile) { - allSources.PushSearchPathDirectory(DirectoryName(currentFile->path())); + allSources_.PushSearchPathDirectory(DirectoryName(currentFile->path())); } - const SourceFile *included{allSources.Open(path, error)}; + const SourceFile *included{allSources_.Open(path, error)}; if (currentFile) { - allSources.PopSearchPathDirectory(); + allSources_.PopSearchPathDirectory(); } if (!included) { Say(provenance, "INCLUDE: %s"_err_en_US, error.str()); @@ -776,7 +777,7 @@ void Prescanner::FortranInclude(const char *firstQuote) { ProvenanceRange includeLineRange{ provenance, static_cast(p - nextLine_)}; ProvenanceRange fileRange{ - allSources.AddIncludedFile(*included, includeLineRange)}; + allSources_.AddIncludedFile(*included, includeLineRange)}; Prescanner{*this}.set_encoding(included->encoding()).Prescan(fileRange); } } diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h index 0b5b64792004a..ab56ed455040b 100644 --- a/flang/lib/Parser/prescan.h +++ b/flang/lib/Parser/prescan.h @@ -33,7 +33,7 @@ class Preprocessor; class Prescanner { public: - Prescanner(Messages &, CookedSource &, Preprocessor &, + Prescanner(Messages &, CookedSource &, AllSources &, Preprocessor &, common::LanguageFeatureControl); Prescanner(const Prescanner &); @@ -65,10 +65,7 @@ class Prescanner { Provenance GetCurrentProvenance() const { return GetProvenance(at_); } template Message &Say(A &&...a) { - Message &m{messages_.Say(std::forward(a)...)}; - std::optional range{m.GetProvenanceRange(cooked_)}; - CHECK(!range || cooked_.IsValid(*range)); - return m; + return messages_.Say(std::forward(a)...); } private: @@ -124,7 +121,7 @@ class Prescanner { } void EmitInsertedChar(TokenSequence &tokens, char ch) { - Provenance provenance{cooked_.allSources().CompilerInsertionProvenance(ch)}; + Provenance provenance{allSources_.CompilerInsertionProvenance(ch)}; tokens.PutNextTokenChar(ch, provenance); } @@ -184,6 +181,7 @@ class Prescanner { Messages &messages_; CookedSource &cooked_; + AllSources &allSources_; Preprocessor &preprocessor_; common::LanguageFeatureControl features_; bool inFixedForm_{false}; @@ -222,9 +220,9 @@ class Prescanner { bool skipLeadingAmpersand_{false}; const Provenance spaceProvenance_{ - cooked_.allSources().CompilerInsertionProvenance(' ')}; + allSources_.CompilerInsertionProvenance(' ')}; const Provenance backslashProvenance_{ - cooked_.allSources().CompilerInsertionProvenance('\\')}; + allSources_.CompilerInsertionProvenance('\\')}; // To avoid probing the set of active compiler directive sentinel strings // on every comment line, they're checked first with a cheap Bloom filter. diff --git a/flang/lib/Parser/provenance.cpp b/flang/lib/Parser/provenance.cpp index 73e0f7154b6b1..bcb871bd7cb41 100644 --- a/flang/lib/Parser/provenance.cpp +++ b/flang/lib/Parser/provenance.cpp @@ -400,12 +400,9 @@ const AllSources::Origin &AllSources::MapToOrigin(Provenance at) const { return origin_[low]; } -CookedSource::CookedSource(AllSources &s) : allSources_{s} {} -CookedSource::~CookedSource() {} - std::optional CookedSource::GetProvenanceRange( CharBlock cookedRange) const { - if (!IsValid(cookedRange)) { + if (!Contains(cookedRange)) { return std::nullopt; } ProvenanceRange first{provenanceMap_.Map(cookedRange.begin() - &data_[0])}; @@ -416,34 +413,6 @@ std::optional CookedSource::GetProvenanceRange( return {ProvenanceRange{first.start(), last.start() - first.start()}}; } -std::optional CookedSource::GetCharBlockFromLineAndColumns( - int line, int startColumn, int endColumn) const { - // 2nd column is exclusive, meaning it is target column + 1. - CHECK(line > 0 && startColumn > 0 && endColumn > 0); - CHECK(startColumn < endColumn); - auto provenanceStart{allSources_.GetFirstFileProvenance().value().start()}; - if (auto sourceFile{allSources_.GetSourceFile(provenanceStart)}) { - CHECK(line <= static_cast(sourceFile->lines())); - return GetCharBlock(ProvenanceRange(sourceFile->GetLineStartOffset(line) + - provenanceStart.offset() + startColumn - 1, - endColumn - startColumn)); - } - return std::nullopt; -} - -std::optional> -CookedSource::GetSourcePositionRange(CharBlock cookedRange) const { - if (auto range{GetProvenanceRange(cookedRange)}) { - if (auto firstOffset{allSources_.GetSourcePosition(range->start())}) { - if (auto secondOffset{ - allSources_.GetSourcePosition(range->start() + range->size())}) { - return std::pair{*firstOffset, *secondOffset}; - } - } - } - return std::nullopt; -} - std::optional CookedSource::GetCharBlock( ProvenanceRange range) const { CHECK(!invertedMap_.empty() && @@ -457,16 +426,17 @@ std::optional CookedSource::GetCharBlock( std::size_t CookedSource::BufferedBytes() const { return buffer_.bytes(); } -void CookedSource::Marshal() { +void CookedSource::Marshal(AllSources &allSources) { CHECK(provenanceMap_.SizeInBytes() == buffer_.bytes()); - provenanceMap_.Put(allSources_.AddCompilerInsertion("(after end of source)")); + provenanceMap_.Put(allSources.AddCompilerInsertion("(after end of source)")); data_ = buffer_.Marshal(); buffer_.clear(); } -void CookedSource::CompileProvenanceRangeToOffsetMappings() { +void CookedSource::CompileProvenanceRangeToOffsetMappings( + AllSources &allSources) { if (invertedMap_.empty()) { - invertedMap_ = provenanceMap_.Invert(allSources_); + invertedMap_ = provenanceMap_.Invert(allSources); } } @@ -534,12 +504,73 @@ llvm::raw_ostream &AllSources::Dump(llvm::raw_ostream &o) const { } llvm::raw_ostream &CookedSource::Dump(llvm::raw_ostream &o) const { - o << "CookedSource:\n"; - allSources_.Dump(o); o << "CookedSource::provenanceMap_:\n"; provenanceMap_.Dump(o); o << "CookedSource::invertedMap_:\n"; invertedMap_.Dump(o); return o; } + +AllCookedSources::AllCookedSources(AllSources &s) : allSources_{s} {} +AllCookedSources::~AllCookedSources() {} + +CookedSource &AllCookedSources::NewCookedSource() { + return cooked_.emplace_back(); +} + +std::optional AllCookedSources::GetProvenanceRange( + CharBlock cb) const { + if (const CookedSource * c{Find(cb)}) { + return c->GetProvenanceRange(cb); + } else { + return std::nullopt; + } +} + +std::optional AllCookedSources::GetCharBlockFromLineAndColumns( + int line, int startColumn, int endColumn) const { + // 2nd column is exclusive, meaning it is target column + 1. + CHECK(line > 0 && startColumn > 0 && endColumn > 0); + CHECK(startColumn < endColumn); + auto provenanceStart{allSources_.GetFirstFileProvenance().value().start()}; + if (auto sourceFile{allSources_.GetSourceFile(provenanceStart)}) { + CHECK(line <= static_cast(sourceFile->lines())); + return GetCharBlock(ProvenanceRange(sourceFile->GetLineStartOffset(line) + + provenanceStart.offset() + startColumn - 1, + endColumn - startColumn)); + } + return std::nullopt; +} + +std::optional> +AllCookedSources::GetSourcePositionRange(CharBlock cookedRange) const { + if (auto range{GetProvenanceRange(cookedRange)}) { + if (auto firstOffset{allSources_.GetSourcePosition(range->start())}) { + if (auto secondOffset{ + allSources_.GetSourcePosition(range->start() + range->size())}) { + return std::pair{*firstOffset, *secondOffset}; + } + } + } + return std::nullopt; +} + +std::optional AllCookedSources::GetCharBlock( + ProvenanceRange range) const { + for (const auto &c : cooked_) { + if (auto result{c.GetCharBlock(range)}) { + return result; + } + } + return nullptr; +} + +void AllCookedSources::Dump(llvm::raw_ostream &o) const { + o << "AllSources:\n"; + allSources_.Dump(o); + for (const auto &c : cooked_) { + c.Dump(o); + } +} + } // namespace Fortran::parser diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 6fa59f0a82a08..ef62a94b1b89e 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -751,7 +751,7 @@ Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) { return it->second->scope(); } } - parser::Parsing parsing{context_.allSources()}; + parser::Parsing parsing{context_.allCookedSources()}; parser::Options options; options.isModuleFile = true; options.features.Enable(common::LanguageFeature::BackslashEscapes); @@ -796,7 +796,6 @@ Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) { } auto &modSymbol{*it->second}; modSymbol.set(Symbol::Flag::ModFile); - modSymbol.scope()->set_chars(parsing.cooked()); return modSymbol.scope(); } diff --git a/flang/lib/Semantics/scope.cpp b/flang/lib/Semantics/scope.cpp index a2a9e1dbe9e73..c7635c0b1a3bb 100644 --- a/flang/lib/Semantics/scope.cpp +++ b/flang/lib/Semantics/scope.cpp @@ -217,14 +217,6 @@ DeclTypeSpec &Scope::MakeDerivedType( return declTypeSpecs_.emplace_back(category, std::move(spec)); } -void Scope::set_chars(parser::CookedSource &cooked) { - CHECK(kind_ == Kind::Module); - CHECK(parent_.IsGlobal() || parent_.IsModuleFile()); - CHECK(DEREF(symbol_).test(Symbol::Flag::ModFile)); - // TODO: Preserve the CookedSource rather than acquiring its string. - chars_ = cooked.AcquireData(); -} - Scope::ImportKind Scope::GetImportKind() const { if (importKind_) { return *importKind_; diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index af5b120d9393a..b5b7802c22a1a 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -181,9 +181,9 @@ static bool PerformStatementSemantics( SemanticsContext::SemanticsContext( const common::IntrinsicTypeDefaultKinds &defaultKinds, const common::LanguageFeatureControl &languageFeatures, - parser::AllSources &allSources) + parser::AllCookedSources &allCookedSources) : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures}, - allSources_{allSources}, + allCookedSources_{allCookedSources}, intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)}, foldingContext_{ parser::ContextualMessages{&messages_}, defaultKinds_, intrinsics_} {} @@ -351,7 +351,7 @@ bool Semantics::Perform() { } void Semantics::EmitMessages(llvm::raw_ostream &os) const { - context_.messages().Emit(os, cooked_); + context_.messages().Emit(os, context_.allCookedSources()); } void Semantics::DumpSymbols(llvm::raw_ostream &os) { @@ -361,9 +361,10 @@ void Semantics::DumpSymbols(llvm::raw_ostream &os) { void Semantics::DumpSymbolsSources(llvm::raw_ostream &os) const { NameToSymbolMap symbols; GetSymbolNames(context_.globalScope(), symbols); + const parser::AllCookedSources &allCooked{context_.allCookedSources()}; for (const auto &pair : symbols) { const Symbol &symbol{pair.second}; - if (auto sourceInfo{cooked_.GetSourcePositionRange(symbol.name())}) { + if (auto sourceInfo{allCooked.GetSourcePositionRange(symbol.name())}) { os << symbol.name().ToString() << ": " << sourceInfo->first.file.path() << ", " << sourceInfo->first.line << ", " << sourceInfo->first.column << "-" << sourceInfo->second.column << "\n"; diff --git a/flang/test/Semantics/getsymbols02.f90 b/flang/test/Semantics/getsymbols02.f90 index 1eed3e922e826..80b7651f029b2 100644 --- a/flang/test/Semantics/getsymbols02.f90 +++ b/flang/test/Semantics/getsymbols02.f90 @@ -10,5 +10,5 @@ PROGRAM helloworld ! RUN: %f18 -fparse-only %S/Inputs/getsymbols02-a.f90 ! RUN: %f18 -fparse-only %S/Inputs/getsymbols02-b.f90 ! RUN: %f18 -fget-symbols-sources -fparse-only %s 2>&1 | FileCheck %s -! CHECK: callget5: mm2b -! CHECK: get5: mm2a +! CHECK: callget5: ./mm2b.mod, +! CHECK: get5: ./mm2a.mod, diff --git a/flang/tools/f18-parse-demo/f18-parse-demo.cpp b/flang/tools/f18-parse-demo/f18-parse-demo.cpp index 60303aa7a24ff..4ccc65e0631d9 100644 --- a/flang/tools/f18-parse-demo/f18-parse-demo.cpp +++ b/flang/tools/f18-parse-demo/f18-parse-demo.cpp @@ -160,14 +160,15 @@ std::string CompileFortran( } options.searchDirectories = driver.searchDirectories; Fortran::parser::AllSources allSources; - Fortran::parser::Parsing parsing{allSources}; + Fortran::parser::AllCookedSources allCookedSources{allSources}; + Fortran::parser::Parsing parsing{allCookedSources}; auto start{CPUseconds()}; parsing.Prescan(path, options); if (!parsing.messages().empty() && (driver.warningsAreErrors || parsing.messages().AnyFatalError())) { llvm::errs() << driver.prefix << "could not scan " << path << '\n'; - parsing.messages().Emit(llvm::errs(), parsing.cooked()); + parsing.messages().Emit(llvm::errs(), parsing.allCooked()); exitStatus = EXIT_FAILURE; return {}; } @@ -191,7 +192,7 @@ std::string CompileFortran( } parsing.ClearLog(); - parsing.messages().Emit(llvm::errs(), parsing.cooked()); + parsing.messages().Emit(llvm::errs(), parsing.allCooked()); if (!parsing.consumedWholeFile()) { parsing.EmitMessage(llvm::errs(), parsing.finalRestingPlace(), "parser FAIL (final position)"); diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp index 156c2337d0c8f..a33a167686e49 100644 --- a/flang/tools/f18/f18.cpp +++ b/flang/tools/f18/f18.cpp @@ -188,9 +188,10 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, DriverOptions &driver, const Fortran::common::IntrinsicTypeDefaultKinds &defaultKinds) { Fortran::parser::AllSources allSources; + Fortran::parser::AllCookedSources allCookedSources{allSources}; allSources.set_encoding(driver.encoding); Fortran::semantics::SemanticsContext semanticsContext{ - defaultKinds, options.features, allSources}; + defaultKinds, options.features, allCookedSources}; semanticsContext.set_moduleDirectory(driver.moduleDirectory) .set_moduleFileSuffix(driver.moduleFileSuffix) .set_searchDirectories(driver.searchDirectories) @@ -204,12 +205,12 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, } } options.searchDirectories = driver.searchDirectories; - Fortran::parser::Parsing parsing{semanticsContext.allSources()}; + Fortran::parser::Parsing parsing{allCookedSources}; parsing.Prescan(path, options); if (!parsing.messages().empty() && (driver.warningsAreErrors || parsing.messages().AnyFatalError())) { llvm::errs() << driver.prefix << "could not scan " << path << '\n'; - parsing.messages().Emit(llvm::errs(), parsing.cooked()); + parsing.messages().Emit(llvm::errs(), allCookedSources); exitStatus = EXIT_FAILURE; return {}; } @@ -218,7 +219,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, return {}; } if (driver.dumpCookedChars) { - parsing.messages().Emit(llvm::errs(), parsing.cooked()); + parsing.messages().Emit(llvm::errs(), allCookedSources); parsing.DumpCookedChars(llvm::outs()); return {}; } @@ -228,7 +229,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, return {}; } parsing.ClearLog(); - parsing.messages().Emit(llvm::errs(), parsing.cooked()); + parsing.messages().Emit(llvm::errs(), allCookedSources); if (!parsing.consumedWholeFile()) { parsing.EmitMessage(llvm::errs(), parsing.finalRestingPlace(), "parser FAIL (final position)"); @@ -274,7 +275,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, return {}; } if (driver.getDefinition) { - if (auto cb{parsing.cooked().GetCharBlockFromLineAndColumns( + if (auto cb{allCookedSources.GetCharBlockFromLineAndColumns( driver.getDefinitionArgs.line, driver.getDefinitionArgs.startColumn, driver.getDefinitionArgs.endColumn)}) { @@ -283,7 +284,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, llvm::errs() << "Found symbol name: " << symbol->name().ToString() << "\n"; if (auto sourceInfo{ - parsing.cooked().GetSourcePositionRange(symbol->name())}) { + allCookedSources.GetSourcePositionRange(symbol->name())}) { llvm::outs() << symbol->name().ToString() << ": " << sourceInfo->first.file.path() << ", " << sourceInfo->first.line << ", " diff --git a/flang/unittests/Evaluate/intrinsics.cpp b/flang/unittests/Evaluate/intrinsics.cpp index 3b9805946286d..4f2a21dfe6048 100644 --- a/flang/unittests/Evaluate/intrinsics.cpp +++ b/flang/unittests/Evaluate/intrinsics.cpp @@ -22,9 +22,9 @@ class CookedStrings { } void Save(const std::string &s) { offsets_[s] = cooked_.Put(s); - cooked_.PutProvenance(cooked_.allSources().AddCompilerInsertion(s)); + cooked_.PutProvenance(allSources_.AddCompilerInsertion(s)); } - void Marshal() { cooked_.Marshal(); } + void Marshal() { cooked_.Marshal(allSources_); } parser::CharBlock operator()(const std::string &s) { return {cooked_.data().data() + offsets_[s], s.size()}; } @@ -32,12 +32,13 @@ class CookedStrings { return parser::ContextualMessages{cooked_.data(), &buffer}; } void Emit(llvm::raw_ostream &o, const parser::Messages &messages) { - messages.Emit(o, cooked_); + messages.Emit(o, allCookedSources_); } private: parser::AllSources allSources_; - parser::CookedSource cooked_{allSources_}; + parser::AllCookedSources allCookedSources_{allSources_}; + parser::CookedSource &cooked_{allCookedSources_.NewCookedSource()}; std::map offsets_; }; From d70e05c9e36ada3ea6341764a3bc34de7de7d8dd Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Wed, 2 Sep 2020 17:44:00 +0100 Subject: [PATCH 024/465] [clang-format] Parse double-square attributes as pointer qualifiers Before: x = (foo *[[clang::attr]]) * v; After: x = (foo *[[clang::attr]])*v; Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D86721 --- clang/lib/Format/TokenAnnotator.cpp | 6 ++++++ clang/unittests/Format/FormatTest.cpp | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index a9077500e041f..f04f101f04593 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1840,6 +1840,12 @@ class AnnotatingParser { T = T->MatchingParen->Previous->Previous; continue; } + } else if (T->is(TT_AttributeSquare)) { + // Handle `x = (foo *[[clang::foo]])&v;`: + if (T->MatchingParen && T->MatchingParen->Previous) { + T = T->MatchingParen->Previous; + continue; + } } else if (T->canBePointerOrReferenceQualifier()) { T = T->Previous; continue; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index f2978cdbed8d6..14c97784b7385 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -8068,6 +8068,8 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyIndependentOfContext("MACRO(A *_Null_unspecified a);"); verifyIndependentOfContext("MACRO(A *__attribute__((foo)) a);"); verifyIndependentOfContext("MACRO(A *__attribute((foo)) a);"); + verifyIndependentOfContext("MACRO(A *[[clang::attr]] a);"); + verifyIndependentOfContext("MACRO(A *[[clang::attr(\"foo\")]] a);"); verifyIndependentOfContext("MACRO('0' <= c && c <= '9');"); verifyFormat("void f() { f(float{1}, a * a); }"); // FIXME: Is there a way to make this work? @@ -8137,14 +8139,17 @@ TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) { verifyFormat("x = (foo *_Nullable)*v;"); verifyFormat("x = (foo *_Null_unspecified)*v;"); verifyFormat("x = (foo *_Nonnull)*v;"); + verifyFormat("x = (foo *[[clang::attr]])*v;"); + verifyFormat("x = (foo *[[clang::attr(\"foo\")]])*v;"); // Check that we handle multiple trailing qualifiers and skip them all to // determine that the expression is a cast to a pointer type. FormatStyle LongPointerRight = getLLVMStyleWithColumns(999); FormatStyle LongPointerLeft = getLLVMStyleWithColumns(999); LongPointerLeft.PointerAlignment = FormatStyle::PAS_Left; - StringRef AllQualifiers = "const volatile restrict __attribute__((foo)) " - "_Nonnull _Null_unspecified _Nonnull"; + StringRef AllQualifiers = + "const volatile restrict __attribute__((foo)) _Nonnull _Null_unspecified " + "_Nonnull [[clang::attr]]"; verifyFormat(("x = (foo *" + AllQualifiers + ")*v;").str(), LongPointerRight); verifyFormat(("x = (foo* " + AllQualifiers + ")*v;").str(), LongPointerLeft); From 352cf57cfb6ad33a95ff2d80109e1e88aa39b77e Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 2 Sep 2020 09:22:50 -0700 Subject: [PATCH 025/465] [Bindings] Move LLVMAddInstructionSimplifyPass to Scalar.cpp Should not be with the pass, but alongside all the other C bindings. Reviewed By: sroland Differential Revision: https://reviews.llvm.org/D87041 --- llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp | 5 ----- llvm/lib/Transforms/Scalar/Scalar.cpp | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp index 76e3f7859f08a..c11d2e4c1d6b9 100644 --- a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp +++ b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp @@ -17,7 +17,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -131,10 +130,6 @@ FunctionPass *llvm::createInstSimplifyLegacyPass() { return new InstSimplifyLegacyPass(); } -void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createInstSimplifyLegacyPass()); -} - PreservedAnalyses InstSimplifyPass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 55b9dd7482cc3..f4dc6f2996b98 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -166,6 +166,10 @@ void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createIndVarSimplifyPass()); } +void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createInstSimplifyLegacyPass()); +} + void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createJumpThreadingPass()); } From 8d2d0e84857cb1f2d01456eb433b5172d3a0772b Mon Sep 17 00:00:00 2001 From: Douglas Yung Date: Wed, 2 Sep 2020 10:35:42 -0700 Subject: [PATCH 026/465] Revert "Move all fields of '-cc1' option related classes into def file databases" This reverts commit c4a2a1307484cffe94a291c42572775411bac8d8. This commit was causing a test failure: http://lab.llvm.org:8011/builders/llvm-clang-win-x-armv7l/builds/1068 --- clang/include/clang/Basic/CodeGenOptions.def | 233 +----------------- clang/include/clang/Basic/CodeGenOptions.h | 212 +++++++++++++++- clang/include/clang/Basic/CommentOptions.def | 26 -- clang/include/clang/Basic/CommentOptions.h | 10 +- .../include/clang/Basic/DiagnosticOptions.def | 27 -- clang/include/clang/Basic/DiagnosticOptions.h | 26 +- .../include/clang/Basic/FileSystemOptions.def | 21 -- clang/include/clang/Basic/FileSystemOptions.h | 5 +- clang/include/clang/Basic/LangOptions.def | 72 ------ clang/include/clang/Basic/LangOptions.h | 69 +++++- clang/include/clang/Basic/TargetOptions.def | 88 ------- clang/include/clang/Basic/TargetOptions.h | 66 ++++- .../clang/Frontend/CompilerInvocation.h | 14 +- .../Frontend/DependencyOutputOptions.def | 50 ---- .../clang/Frontend/DependencyOutputOptions.h | 46 +++- .../clang/Frontend/FrontendOptions.def | 179 -------------- .../include/clang/Frontend/FrontendOptions.h | 191 ++++++++++++-- .../clang/Frontend/MigratorOptions.def | 27 -- .../include/clang/Frontend/MigratorOptions.h | 11 +- .../Frontend/PreprocessorOutputOptions.def | 46 ---- .../Frontend/PreprocessorOutputOptions.h | 12 +- .../include/clang/Lex/HeaderSearchOptions.def | 136 ---------- clang/include/clang/Lex/HeaderSearchOptions.h | 127 +++++++++- .../include/clang/Lex/PreprocessorOptions.def | 166 ------------- clang/include/clang/Lex/PreprocessorOptions.h | 157 ++++++++++-- .../clang/Sema/CodeCompleteOptions.def | 51 ---- .../include/clang/Sema/CodeCompleteOptions.h | 35 ++- .../StaticAnalyzer/Core/AnalyzerOptions.def | 94 +------ .../StaticAnalyzer/Core/AnalyzerOptions.h | 94 ++++++- clang/lib/Basic/CodeGenOptions.cpp | 5 +- clang/lib/Basic/LangOptions.cpp | 3 +- 31 files changed, 975 insertions(+), 1324 deletions(-) delete mode 100644 clang/include/clang/Basic/CommentOptions.def delete mode 100644 clang/include/clang/Basic/FileSystemOptions.def delete mode 100644 clang/include/clang/Basic/TargetOptions.def delete mode 100644 clang/include/clang/Frontend/DependencyOutputOptions.def delete mode 100644 clang/include/clang/Frontend/FrontendOptions.def delete mode 100644 clang/include/clang/Frontend/MigratorOptions.def delete mode 100644 clang/include/clang/Frontend/PreprocessorOutputOptions.def delete mode 100644 clang/include/clang/Lex/HeaderSearchOptions.def delete mode 100644 clang/include/clang/Lex/PreprocessorOptions.def delete mode 100644 clang/include/clang/Sema/CodeCompleteOptions.def diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 758dfbc1d283d..8b89aac8d6d5f 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -14,20 +14,17 @@ // //===----------------------------------------------------------------------===// #ifndef CODEGENOPT -#error Define the CODEGENOPT macro to handle language options +# error Define the CODEGENOPT macro to handle language options #endif #ifndef VALUE_CODEGENOPT -#define VALUE_CODEGENOPT(Name, Bits, Default) CODEGENOPT(Name, Bits, Default) +# define VALUE_CODEGENOPT(Name, Bits, Default) \ +CODEGENOPT(Name, Bits, Default) #endif #ifndef ENUM_CODEGENOPT -#define ENUM_CODEGENOPT(Name, Type, Bits, Default) \ - CODEGENOPT(Name, Bits, Default) -#endif - -#ifndef TYPED_CODEGENOPT -#define TYPED_CODEGENOPT(Type, Name, Description) +# define ENUM_CODEGENOPT(Name, Type, Bits, Default) \ +CODEGENOPT(Name, Bits, Default) #endif CODEGENOPT(DisableIntegratedAS, 1, 0) ///< -no-integrated-as @@ -398,226 +395,6 @@ CODEGENOPT(KeepStaticConsts, 1, 0) /// Whether to not follow the AAPCS that enforce at least one read before storing to a volatile bitfield CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0) -TYPED_CODEGENOPT( - std::string, BBSections, - "This field stores one of the allowed values for the option " - "-fbasic-block-sections=. The allowed values with this option are: " - "{\"labels\", \"all\", \"list=\", \"none\"}. \"labels\": Only " - "generate basic block symbols (labels) for all basic blocks, do not " - "generate unique sections for basic blocks. Use the machine basic block id " - "in the symbol name to associate profile info from virtual address to " - "machine basic block. \"all\" : Generate basic block sections for " - "all basic blocks. \"list=\": Generate basic block sections for a " - "subset of basic blocks. The functions and the machine basic block ids are " - "specified in the file. \"none\": Disable sections/labels for basic " - "blocks.") - -TYPED_CODEGENOPT(std::string, CodeModel, "The code model to use (-mcmodel).") - -TYPED_CODEGENOPT(std::string, CoverageDataFile, - "The filename with path we use for coverage data files. The " - "runtime allows further manipulation with the GCOV_PREFIX and " - "GCOV_PREFIX_STRIP environment variables. The filename with " - "path we use for coverage notes files.") -TYPED_CODEGENOPT(std::string, CoverageNotesFile, "") - -TYPED_CODEGENOPT( - std::string, ProfileFilterFiles, - "Regexes separated by a semi-colon to filter the files to instrument.") - -TYPED_CODEGENOPT( - std::string, ProfileExcludeFiles, - "Regexes separated by a semi-colon to filter the files to not instrument.") - -TYPED_CODEGENOPT(CoverageVersionTy, CoverageVersion, - "The version string to put into coverage files.") - -TYPED_CODEGENOPT(std::string, DebugPass, - "Enable additional debugging information.") - -TYPED_CODEGENOPT(std::string, DebugCompilationDir, - "The string to embed in debug information as the current " - "working directory.") - -TYPED_CODEGENOPT(std::string, DwarfDebugFlags, - "The string to embed in the debug information for the compile " - "unit, if non-empty.") - -TYPED_CODEGENOPT(std::string, RecordCommandLine, - "The string containing the commandline for the " - "llvm.commandline metadata, if non-empty.") - -TYPED_CODEGENOPT(DebugPrefixMapTy, DebugPrefixMap, "") - -TYPED_CODEGENOPT(std::string, FloatABI, - "The ABI to use for passing floating point arguments.") - -TYPED_CODEGENOPT(llvm::DenormalMode, FPDenormalMode, - "The floating-point denormal mode to use.") - -TYPED_CODEGENOPT(llvm::DenormalMode, FP32DenormalMode, - "The floating-point denormal mode to use, for float.") - -TYPED_CODEGENOPT(std::string, LimitFloatPrecision, - "The float precision limit to use, if non-empty.") - -TYPED_CODEGENOPT(std::vector, LinkBitcodeFiles, - "The files specified here are linked in to the module before " - "optimizations.") - -TYPED_CODEGENOPT( - std::string, MainFileName, - "The user provided name for the \"main file\", if non-empty. This is " - "useful in situations where the input file name does not match the " - "original input file, for example with -save-temps.") - -TYPED_CODEGENOPT(std::string, SplitDwarfFile, - "The name for the split debug info file used for the " - "DW_AT_[GNU_]dwo_name attribute in the skeleton CU.") - -TYPED_CODEGENOPT( - std::string, SplitDwarfOutput, - "Output filename for the split debug info, not used in the skeleton CU.") - -TYPED_CODEGENOPT(llvm::Reloc::Model, RelocationModel, - "The name of the relocation model to use.") - -TYPED_CODEGENOPT(std::string, ThreadModel, "The thread model to use") - -TYPED_CODEGENOPT(std::string, TrapFuncName, - "If not an empty string, trap intrinsics are lowered to calls " - "to this function instead of to trap instructions.") - -TYPED_CODEGENOPT(std::vector, DependentLibraries, - "A list of dependent libraries.") - -TYPED_CODEGENOPT(std::vector, LinkerOptions, - "A list of linker options to embed in the object file.") - -TYPED_CODEGENOPT( - std::string, InstrProfileOutput, - "Name of the profile file to use as output for -fprofile-instr-generate, " - "-fprofile-generate, and -fcs-profile-generate.") - -TYPED_CODEGENOPT(std::string, SampleProfileFile, - "Name of the profile file to use with -fprofile-sample-use.") - -TYPED_CODEGENOPT( - std::string, ProfileInstrumentUsePath, - "Name of the profile file to use as input for -fprofile-instr-use") - -TYPED_CODEGENOPT( - std::string, ProfileRemappingFile, - "Name of the profile remapping file to apply to the profile data supplied " - "by -fprofile-sample-use or -fprofile-instr-use.") - -TYPED_CODEGENOPT(std::string, ThinLTOIndexFile, - "Name of the function summary index file to use for ThinLTO " - "function importing.") - -TYPED_CODEGENOPT( - std::string, ThinLinkBitcodeFile, - "Name of a file that can optionally be written with minimized bitcode to " - "be used as input for the ThinLTO thin link step, which only needs the " - "summary and module symbol table (and not, e.g. any debug metadata).") - -TYPED_CODEGENOPT(std::string, SaveTempsFilePrefix, - "Prefix to use for -save-temps output.") - -TYPED_CODEGENOPT( - std::string, CudaGpuBinaryFileName, - "Name of file passed with -fcuda-include-gpubinary option to forward to " - "CUDA runtime back-end for incorporating them into host-side object file.") - -TYPED_CODEGENOPT(std::string, OptRecordFile, - "The name of the file to which the backend should save YAML " - "optimization records.") - -TYPED_CODEGENOPT(std::string, OptRecordPasses, - "The regex that filters the passes that should be saved to " - "the optimization records.") - -TYPED_CODEGENOPT(std::string, OptRecordFormat, - "The format used for serializing remarks (default: YAML)") - -TYPED_CODEGENOPT( - std::string, SymbolPartition, - "The name of the partition that symbols are assigned to, specified with " - "-fsymbol-partition (see https://lld.llvm.org/Partitions.html).") - -TYPED_CODEGENOPT( - std::shared_ptr, OptimizationRemarkPattern, - "Regular expression to select optimizations for which we should enable " - "optimization remarks. Transformation passes whose name matches this " - "expression (and support this feature), will emit a diagnostic whenever " - "they perform a transformation. This is enabled by the -Rpass=regexp flag.") - -TYPED_CODEGENOPT( - std::shared_ptr, OptimizationRemarkMissedPattern, - "Regular expression to select optimizations for which we should enable " - "missed optimization remarks. Transformation passes whose name matches " - "this expression (and support this feature), will emit a diagnostic " - "whenever they tried but failed to perform a transformation. This is " - "enabled by the -Rpass-missed=regexp flag.") - -TYPED_CODEGENOPT( - std::shared_ptr, OptimizationRemarkAnalysisPattern, - "Regular expression to select optimizations for which we should enable " - "optimization analyses. Transformation passes whose name matches this " - "expression (and support this feature), will emit a diagnostic whenever " - "they want to explain why they decided to apply or not apply a given " - "transformation. This is enabled by the -Rpass-analysis=regexp flag.") - -TYPED_CODEGENOPT(std::vector, RewriteMapFiles, - "Set of files defining the rules for the symbol rewriting.") - -TYPED_CODEGENOPT(SanitizerSet, SanitizeRecover, - "Set of sanitizer checks that are non-fatal (i.e. execution " - "should be continued when possible).") - -TYPED_CODEGENOPT(SanitizerSet, SanitizeTrap, - "Set of sanitizer checks that trap rather than diagnose.") - -TYPED_CODEGENOPT(std::vector, CmdArgs, - "List of backend command-line options for -fembed-bitcode.") - -TYPED_CODEGENOPT(std::vector, NoBuiltinFuncs, - "A list of all -fno-builtin-* function names (e.g., memset).") - -TYPED_CODEGENOPT(std::vector, Reciprocals, "") - -TYPED_CODEGENOPT(std::string, PreferVectorWidth, - "The preferred width for auto-vectorization transforms. This " - "is intended to override default transforms based on the " - "width of the architected vector registers.") - -TYPED_CODEGENOPT(XRayInstrSet, XRayInstrumentationBundle, - "Set of XRay instrumentation kinds to emit.") - -TYPED_CODEGENOPT(std::vector, DefaultFunctionAttrs, "") - -TYPED_CODEGENOPT( - std::vector, PassPlugins, - "List of dynamic shared object files to be loaded as pass plugins.") - -TYPED_CODEGENOPT( - std::vector, SanitizeCoverageAllowlistFiles, - "Path to allowlist file specifying which objects (files, functions) should " - "exclusively be instrumented by sanitizer coverage pass.") - -TYPED_CODEGENOPT(std::vector, SanitizeCoverageBlocklistFiles, - "Path to blocklist file specifying which objects (files, " - "functions) listed for instrumentation by sanitizer coverage " - "pass should actually not be instrumented.") - -TYPED_CODEGENOPT( - const char *, Argv0, - "Executable and command-line used to create a given CompilerInvocation. " - "Most of the time this will be the full -cc1 command.") - -TYPED_CODEGENOPT(ArrayRef, CommandLineArgs, "") - #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT -#undef TYPED_CODEGENOPT diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 093f4014ae8c3..ca391bf8f1861 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -110,15 +110,75 @@ class CodeGenOptions : public CodeGenOptionsBase { Embed_Marker // Embed a marker as a placeholder for bitcode. }; + // This field stores one of the allowed values for the option + // -fbasic-block-sections=. The allowed values with this option are: + // {"labels", "all", "list=", "none"}. + // + // "labels": Only generate basic block symbols (labels) for all basic + // blocks, do not generate unique sections for basic blocks. + // Use the machine basic block id in the symbol name to + // associate profile info from virtual address to machine + // basic block. + // "all" : Generate basic block sections for all basic blocks. + // "list=": Generate basic block sections for a subset of basic blocks. + // The functions and the machine basic block ids are specified + // in the file. + // "none": Disable sections/labels for basic blocks. + std::string BBSections; + enum class FramePointerKind { None, // Omit all frame pointers. NonLeaf, // Keep non-leaf frame pointers. All, // Keep all frame pointers. }; - using DebugPrefixMapTy = std::map; + /// The code model to use (-mcmodel). + std::string CodeModel; + + /// The filename with path we use for coverage data files. The runtime + /// allows further manipulation with the GCOV_PREFIX and GCOV_PREFIX_STRIP + /// environment variables. + std::string CoverageDataFile; + + /// The filename with path we use for coverage notes files. + std::string CoverageNotesFile; + + /// Regexes separated by a semi-colon to filter the files to instrument. + std::string ProfileFilterFiles; + + /// Regexes separated by a semi-colon to filter the files to not instrument. + std::string ProfileExcludeFiles; + + /// The version string to put into coverage files. + char CoverageVersion[4]; + + /// Enable additional debugging information. + std::string DebugPass; + + /// The string to embed in debug information as the current working directory. + std::string DebugCompilationDir; + + /// The string to embed in the debug information for the compile unit, if + /// non-empty. + std::string DwarfDebugFlags; + + /// The string containing the commandline for the llvm.commandline metadata, + /// if non-empty. + std::string RecordCommandLine; + + std::map DebugPrefixMap; + + /// The ABI to use for passing floating point arguments. + std::string FloatABI; + + /// The floating-point denormal mode to use. + llvm::DenormalMode FPDenormalMode = llvm::DenormalMode::getIEEE(); - using CoverageVersionTy = char[4]; + /// The floating-point denormal mode to use, for float. + llvm::DenormalMode FP32DenormalMode = llvm::DenormalMode::getIEEE(); + + /// The float precision limit to use, if non-empty. + std::string LimitFloatPrecision; struct BitcodeFileToLink { /// The filename of the bitcode file to link in. @@ -133,14 +193,156 @@ class CodeGenOptions : public CodeGenOptionsBase { unsigned LinkFlags = 0; }; + /// The files specified here are linked in to the module before optimizations. + std::vector LinkBitcodeFiles; + + /// The user provided name for the "main file", if non-empty. This is useful + /// in situations where the input file name does not match the original input + /// file, for example with -save-temps. + std::string MainFileName; + + /// The name for the split debug info file used for the DW_AT_[GNU_]dwo_name + /// attribute in the skeleton CU. + std::string SplitDwarfFile; + + /// Output filename for the split debug info, not used in the skeleton CU. + std::string SplitDwarfOutput; + + /// The name of the relocation model to use. + llvm::Reloc::Model RelocationModel; + + /// The thread model to use + std::string ThreadModel; + + /// If not an empty string, trap intrinsics are lowered to calls to this + /// function instead of to trap instructions. + std::string TrapFuncName; + + /// A list of dependent libraries. + std::vector DependentLibraries; + + /// A list of linker options to embed in the object file. + std::vector LinkerOptions; + + /// Name of the profile file to use as output for -fprofile-instr-generate, + /// -fprofile-generate, and -fcs-profile-generate. + std::string InstrProfileOutput; + + /// Name of the profile file to use with -fprofile-sample-use. + std::string SampleProfileFile; + + /// Name of the profile file to use as input for -fprofile-instr-use + std::string ProfileInstrumentUsePath; + + /// Name of the profile remapping file to apply to the profile data supplied + /// by -fprofile-sample-use or -fprofile-instr-use. + std::string ProfileRemappingFile; + + /// Name of the function summary index file to use for ThinLTO function + /// importing. + std::string ThinLTOIndexFile; + + /// Name of a file that can optionally be written with minimized bitcode + /// to be used as input for the ThinLTO thin link step, which only needs + /// the summary and module symbol table (and not, e.g. any debug metadata). + std::string ThinLinkBitcodeFile; + + /// Prefix to use for -save-temps output. + std::string SaveTempsFilePrefix; + + /// Name of file passed with -fcuda-include-gpubinary option to forward to + /// CUDA runtime back-end for incorporating them into host-side object file. + std::string CudaGpuBinaryFileName; + + /// The name of the file to which the backend should save YAML optimization + /// records. + std::string OptRecordFile; + + /// The regex that filters the passes that should be saved to the optimization + /// records. + std::string OptRecordPasses; + + /// The format used for serializing remarks (default: YAML) + std::string OptRecordFormat; + + /// The name of the partition that symbols are assigned to, specified with + /// -fsymbol-partition (see https://lld.llvm.org/Partitions.html). + std::string SymbolPartition; + + /// Regular expression to select optimizations for which we should enable + /// optimization remarks. Transformation passes whose name matches this + /// expression (and support this feature), will emit a diagnostic + /// whenever they perform a transformation. This is enabled by the + /// -Rpass=regexp flag. + std::shared_ptr OptimizationRemarkPattern; + + /// Regular expression to select optimizations for which we should enable + /// missed optimization remarks. Transformation passes whose name matches this + /// expression (and support this feature), will emit a diagnostic + /// whenever they tried but failed to perform a transformation. This is + /// enabled by the -Rpass-missed=regexp flag. + std::shared_ptr OptimizationRemarkMissedPattern; + + /// Regular expression to select optimizations for which we should enable + /// optimization analyses. Transformation passes whose name matches this + /// expression (and support this feature), will emit a diagnostic + /// whenever they want to explain why they decided to apply or not apply + /// a given transformation. This is enabled by the -Rpass-analysis=regexp + /// flag. + std::shared_ptr OptimizationRemarkAnalysisPattern; + + /// Set of files defining the rules for the symbol rewriting. + std::vector RewriteMapFiles; + + /// Set of sanitizer checks that are non-fatal (i.e. execution should be + /// continued when possible). + SanitizerSet SanitizeRecover; + + /// Set of sanitizer checks that trap rather than diagnose. + SanitizerSet SanitizeTrap; + + /// List of backend command-line options for -fembed-bitcode. + std::vector CmdArgs; + + /// A list of all -fno-builtin-* function names (e.g., memset). + std::vector NoBuiltinFuncs; + + std::vector Reciprocals; + + /// The preferred width for auto-vectorization transforms. This is intended to + /// override default transforms based on the width of the architected vector + /// registers. + std::string PreferVectorWidth; + + /// Set of XRay instrumentation kinds to emit. + XRayInstrSet XRayInstrumentationBundle; + + std::vector DefaultFunctionAttrs; + + /// List of dynamic shared object files to be loaded as pass plugins. + std::vector PassPlugins; + + /// Path to allowlist file specifying which objects + /// (files, functions) should exclusively be instrumented + /// by sanitizer coverage pass. + std::vector SanitizeCoverageAllowlistFiles; + + /// Path to blocklist file specifying which objects + /// (files, functions) listed for instrumentation by sanitizer + /// coverage pass should actually not be instrumented. + std::vector SanitizeCoverageBlocklistFiles; + + /// Executable and command-line used to create a given CompilerInvocation. + /// Most of the time this will be the full -cc1 command. + const char *Argv0 = nullptr; + ArrayRef CommandLineArgs; public: // Define accessors/mutators for code generation options of enumeration type. #define CODEGENOPT(Name, Bits, Default) -#define ENUM_CODEGENOPT(Name, Type, Bits, Default) \ - Type get##Name() const { return static_cast(Name); } \ +#define ENUM_CODEGENOPT(Name, Type, Bits, Default) \ + Type get##Name() const { return static_cast(Name); } \ void set##Name(Type Value) { Name = static_cast(Value); } -#define TYPED_CODEGENOPT(Type, Name, Description) Type Name; #include "clang/Basic/CodeGenOptions.def" CodeGenOptions(); diff --git a/clang/include/clang/Basic/CommentOptions.def b/clang/include/clang/Basic/CommentOptions.def deleted file mode 100644 index 537f9eb34bd43..0000000000000 --- a/clang/include/clang/Basic/CommentOptions.def +++ /dev/null @@ -1,26 +0,0 @@ -//===--- CommentOptions.def - Comment option database -------------*- C++ -//-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the comment options. Users of this file must -// define the TYPED_COMMENTOPT macro to make use of this information. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_COMMENTOPT -#define TYPED_COMMENTOPT(Type, Name, Description) -#endif - -TYPED_COMMENTOPT(BlockCommandNamesTy, BlockCommandNames, - "Command names to treat as vlock commands in comments. Should " - "not include the leading backslash.") - -TYPED_COMMENTOPT(bool, ParseAllComments, - "Treat ordinary comments as documentation comments") - -#undef TYPED_COMMENTOPT diff --git a/clang/include/clang/Basic/CommentOptions.h b/clang/include/clang/Basic/CommentOptions.h index 149650e6192a4..7d142fc32f511 100644 --- a/clang/include/clang/Basic/CommentOptions.h +++ b/clang/include/clang/Basic/CommentOptions.h @@ -23,10 +23,14 @@ namespace clang { struct CommentOptions { using BlockCommandNamesTy = std::vector; -#define TYPED_COMMENTOPT(Type, Name, Description) Type Name; -#include "clang/Basic/CommentOptions.def" + /// Command names to treat as block commands in comments. + /// Should not include the leading backslash. + BlockCommandNamesTy BlockCommandNames; - CommentOptions() : ParseAllComments(false) {} + /// Treat ordinary comments as documentation comments. + bool ParseAllComments = false; + + CommentOptions() = default; }; } // namespace clang diff --git a/clang/include/clang/Basic/DiagnosticOptions.def b/clang/include/clang/Basic/DiagnosticOptions.def index 35b01b8c5ce04..a946b5c6be8ef 100644 --- a/clang/include/clang/Basic/DiagnosticOptions.def +++ b/clang/include/clang/Basic/DiagnosticOptions.def @@ -43,10 +43,6 @@ DIAGOPT(Name, Bits, Default) ENUM_DIAGOPT(Name, Type, Bits, Default) #endif -#ifndef TYPED_DIAGOPT -#define TYPED_DIAGOPT(Type, Name, Description) -#endif - SEMANTIC_DIAGOPT(IgnoreWarnings, 1, 0) /// -w DIAGOPT(NoRewriteMacros, 1, 0) /// -Wno-rewrite-macros DIAGOPT(Pedantic, 1, 0) /// -pedantic @@ -99,32 +95,9 @@ VALUE_DIAGOPT(TabStop, 32, DefaultTabStop) /// The distance between tab stops. /// Column limit for formatting message diagnostics, or 0 if unused. VALUE_DIAGOPT(MessageLength, 32, 0) -TYPED_DIAGOPT(std::string, DiagnosticLogFile, - "The file to log diagnostic output to.") - -TYPED_DIAGOPT(std::string, DiagnosticSerializationFile, - "The file to serialize diagnostics to (non-appending).") - -TYPED_DIAGOPT(std::vector, Warnings, - "The list of -W... options used to alter the diagnostic " - "mappings, with the prefixes removed.") - -TYPED_DIAGOPT(std::vector, UndefPrefixes, - "The list of prefixes from -Wundef-prefix=... used to generate " - "warnings for undefined macros.") - -TYPED_DIAGOPT(std::vector, Remarks, - "The list of -R... options used to alter the diagnostic " - "mappings, with the prefixes removed.") - -TYPED_DIAGOPT(std::vector, VerifyPrefixes, - "The prefixes for comment directives sought by -verify " - "(\"expected\" by /// default).") - #undef DIAGOPT #undef ENUM_DIAGOPT #undef VALUE_DIAGOPT #undef SEMANTIC_DIAGOPT #undef SEMANTIC_ENUM_DIAGOPT #undef SEMANTIC_VALUE_DIAGOPT -#undef TYPED_DIAGOPT diff --git a/clang/include/clang/Basic/DiagnosticOptions.h b/clang/include/clang/Basic/DiagnosticOptions.h index 2b6bd1fd2be57..7fbe534c5994b 100644 --- a/clang/include/clang/Basic/DiagnosticOptions.h +++ b/clang/include/clang/Basic/DiagnosticOptions.h @@ -88,9 +88,31 @@ class DiagnosticOptions : public RefCountedBase{ #include "clang/Basic/DiagnosticOptions.def" public: -#define TYPED_DIAGOPT(Type, Name, Description) Type Name; + /// The file to log diagnostic output to. + std::string DiagnosticLogFile; + + /// The file to serialize diagnostics to (non-appending). + std::string DiagnosticSerializationFile; + + /// The list of -W... options used to alter the diagnostic mappings, with the + /// prefixes removed. + std::vector Warnings; + + /// The list of prefixes from -Wundef-prefix=... used to generate warnings + /// for undefined macros. + std::vector UndefPrefixes; + + /// The list of -R... options used to alter the diagnostic mappings, with the + /// prefixes removed. + std::vector Remarks; + + /// The prefixes for comment directives sought by -verify ("expected" by + /// default). + std::vector VerifyPrefixes; + +public: + // Define accessors/mutators for diagnostic options of enumeration type. #define DIAGOPT(Name, Bits, Default) -// Define accessors/mutators for diagnostic options of enumeration type. #define ENUM_DIAGOPT(Name, Type, Bits, Default) \ Type get##Name() const { return static_cast(Name); } \ void set##Name(Type Value) { Name = static_cast(Value); } diff --git a/clang/include/clang/Basic/FileSystemOptions.def b/clang/include/clang/Basic/FileSystemOptions.def deleted file mode 100644 index 794e9871998e7..0000000000000 --- a/clang/include/clang/Basic/FileSystemOptions.def +++ /dev/null @@ -1,21 +0,0 @@ -//===--- FileSystemOptions.def - FileSystem option database -----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the FileSystem options. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_FILESYSTEMOPT -#error define TYPED_FILESYSTEMOPT macro to hand filesystem options -#endif - -TYPED_FILESYSTEMOPT(std::string, WorkingDir, - "If set, paths are resolved as if the working directory was set " - "to the value of WorkingDir.") - -#undef TYPED_FILESYSTEMOPT diff --git a/clang/include/clang/Basic/FileSystemOptions.h b/clang/include/clang/Basic/FileSystemOptions.h index 4fd0851145a2b..458af0c7b6592 100644 --- a/clang/include/clang/Basic/FileSystemOptions.h +++ b/clang/include/clang/Basic/FileSystemOptions.h @@ -21,8 +21,9 @@ namespace clang { /// Keeps track of options that affect how file operations are performed. class FileSystemOptions { public: -#define TYPED_FILESYSTEMOPT(Type, Name, Description) Type Name; -#include "clang/Basic/FileSystemOptions.def" + /// If set, paths are resolved as if the working directory was + /// set to the value of WorkingDir. + std::string WorkingDir; }; } // end namespace clang diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 55a784196bb9c..3132e76354189 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -78,10 +78,6 @@ COMPATIBLE_VALUE_LANGOPT(Name, Bits, Default, Description) #endif -#ifndef TYPED_LANGOPT -#define TYPED_LANGOPT(Type, Name, Descritpion) -#endif - // FIXME: A lot of the BENIGN_ options should be COMPATIBLE_ instead. LANGOPT(C99 , 1, 0, "C99") LANGOPT(C11 , 1, 0, "C11") @@ -391,73 +387,6 @@ LANGOPT(RelativeCXXABIVTables, 1, 0, LANGOPT(ArmSveVectorBits, 32, 0, "SVE vector size in bits") -TYPED_LANGOPT(SanitizerSet, Sanitize, "Set of enabled sanitizers.") - -TYPED_LANGOPT(std::vector, SanitizerBlacklistFiles, - "Paths to blacklist files specifying which objects (files, " - "functions, variables) should not be instrumented.") - -TYPED_LANGOPT(std::vector, XRayAlwaysInstrumentFiles, - "Paths to the XRay \"always instrument\" files specifying which " - "objects (files, functions, variables) should be imbued with the " - "XRay \"always instrument\" attribute. WARNING: This is a " - "deprecated field and will go away in the future.") - -TYPED_LANGOPT(std::vector, XRayNeverInstrumentFiles, - "Paths to the XRay \"never instrument\" files specifying which " - "objects (files, functions, variables) should be imbued with the " - "XRay \"never instrument\" attribute. WARNING: This is a " - "deprecated field and will go away in the future.") - -TYPED_LANGOPT(std::vector, XRayAttrListFiles, - "Paths to the XRay attribute list files, specifying which " - "objects (files, functions, variables) should be imbued with the " - "appropriate XRay attribute(s).") - -TYPED_LANGOPT(clang::ObjCRuntime, ObjCRuntime, "") - -TYPED_LANGOPT(CoreFoundationABI, CFRuntime, "") - -TYPED_LANGOPT(std::string, ObjCConstantStringClass, "") - -TYPED_LANGOPT( - std::string, OverflowHandler, - "The name of the handler function to be called when -ftrapv is specified. " - "If none is specified, abort (GCC-compatible behaviour).") - -TYPED_LANGOPT( - std::string, ModuleName, - "The module currently being compiled as specified by -fmodule-name.") - -TYPED_LANGOPT( - std::string, CurrentModule, - "The name of the current module, of which the main source file is a part. " - "If CompilingModule is set, we are compiling the interface of this module, " - "otherwise we are compiling an implementation file of it. This starts as " - "ModuleName in case -fmodule-name is provided and changes during " - "compilation to reflect the current module.") - -TYPED_LANGOPT(std::vector, ModuleFeatures, - "The names of any features to enable in module 'requires' decls " - "in addition to the hard-coded list in Module.cpp and the target " - "features. This list is sorted.") - -TYPED_LANGOPT(std::vector, NoBuiltinFuncs, - "A list of all -fno-builtin-* function names (e.g., memset).") - -TYPED_LANGOPT( - std::vector, OMPTargetTriples, - "Triples of the OpenMP targets that the host code codegen should take into " - "account in order to generate accurate offloading descriptors.") - -TYPED_LANGOPT(std::string, OMPHostIRFile, - "Name of the IR file that contains the result of the OpenMP " - "target host code generation.") - -TYPED_LANGOPT(bool, IsHeaderFile, - "Indicates whether the front-end is explicitly told that the " - "input is a header file (i.e. -x c-header).") - #undef LANGOPT #undef COMPATIBLE_LANGOPT #undef BENIGN_LANGOPT @@ -467,4 +396,3 @@ TYPED_LANGOPT(bool, IsHeaderFile, #undef VALUE_LANGOPT #undef COMPATIBLE_VALUE_LANGOPT #undef BENIGN_VALUE_LANGOPT -#undef TYPED_LANGOPT diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 6af97fd5b2a79..4e277435bf8fc 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -227,12 +227,75 @@ class LangOptions : public LangOptionsBase { }; public: + /// Set of enabled sanitizers. + SanitizerSet Sanitize; + + /// Paths to blacklist files specifying which objects + /// (files, functions, variables) should not be instrumented. + std::vector SanitizerBlacklistFiles; + + /// Paths to the XRay "always instrument" files specifying which + /// objects (files, functions, variables) should be imbued with the XRay + /// "always instrument" attribute. + /// WARNING: This is a deprecated field and will go away in the future. + std::vector XRayAlwaysInstrumentFiles; + + /// Paths to the XRay "never instrument" files specifying which + /// objects (files, functions, variables) should be imbued with the XRay + /// "never instrument" attribute. + /// WARNING: This is a deprecated field and will go away in the future. + std::vector XRayNeverInstrumentFiles; + + /// Paths to the XRay attribute list files, specifying which objects + /// (files, functions, variables) should be imbued with the appropriate XRay + /// attribute(s). + std::vector XRayAttrListFiles; + + clang::ObjCRuntime ObjCRuntime; + + CoreFoundationABI CFRuntime = CoreFoundationABI::Unspecified; + + std::string ObjCConstantStringClass; + + /// The name of the handler function to be called when -ftrapv is + /// specified. + /// + /// If none is specified, abort (GCC-compatible behaviour). + std::string OverflowHandler; + + /// The module currently being compiled as specified by -fmodule-name. + std::string ModuleName; + + /// The name of the current module, of which the main source file + /// is a part. If CompilingModule is set, we are compiling the interface + /// of this module, otherwise we are compiling an implementation file of + /// it. This starts as ModuleName in case -fmodule-name is provided and + /// changes during compilation to reflect the current module. + std::string CurrentModule; + + /// The names of any features to enable in module 'requires' decls + /// in addition to the hard-coded list in Module.cpp and the target features. + /// + /// This list is sorted. + std::vector ModuleFeatures; + /// Options for parsing comments. CommentOptions CommentOpts; -#define LANGOPT(Name, Bits, Default, Description) -#define TYPED_LANGOPT(Type, Name, Description) Type Name; -#include "clang/Basic/LangOptions.def" + /// A list of all -fno-builtin-* function names (e.g., memset). + std::vector NoBuiltinFuncs; + + /// Triples of the OpenMP targets that the host code codegen should + /// take into account in order to generate accurate offloading descriptors. + std::vector OMPTargetTriples; + + /// Name of the IR file that contains the result of the OpenMP target + /// host code generation. + std::string OMPHostIRFile; + + /// Indicates whether the front-end is explicitly told that the + /// input is a header file (i.e. -x c-header). + bool IsHeaderFile = false; LangOptions(); diff --git a/clang/include/clang/Basic/TargetOptions.def b/clang/include/clang/Basic/TargetOptions.def deleted file mode 100644 index 33e746f012cea..0000000000000 --- a/clang/include/clang/Basic/TargetOptions.def +++ /dev/null @@ -1,88 +0,0 @@ -//===--- TargetOptions.def - Target option database -------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the target options. Users of this file must -// define the TYPED_TARGETOPT macro to make use of this information. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_TARGETOPT -#error Define the TYPED_TARGETOPT macro to handle target options -#endif - -TYPED_TARGETOPT(std::string, Triple, - "The name of the target triple to compile for.") - -TYPED_TARGETOPT( - std::string, HostTriple, - "When compiling for the device side, contains the triple used to " - "compile for the host.") - -TYPED_TARGETOPT(std::string, CPU, - "If given, the name of the target CPU to generate code for.") - -TYPED_TARGETOPT(std::string, TuneCPU, - "If given, the name of the target CPU to tune code for.") - -TYPED_TARGETOPT(std::string, FPMath, - "If given, the unit to use for floating point math.") - -TYPED_TARGETOPT(std::string, ABI, - "If given, the name of the target ABI to use.") - -TYPED_TARGETOPT(llvm::EABI, EABIVersion, "The EABI version to use.") - -TYPED_TARGETOPT(std::string, LinkerVersion, - "If given, the version string of the linker in use.") - -TYPED_TARGETOPT(std::vector, FeaturesAsWritten, - "The list of target specific features to enable or disable, as " - "written on the command line.") - -TYPED_TARGETOPT( - std::vector, Features, - "The list of target specific features to enable or disable -- this " - "should be a list of strings starting with by '+' or '-'.") - -TYPED_TARGETOPT(llvm::StringMap, FeatureMap, - "The map of which features have been enabled disabled based on " - "the command line.") - -TYPED_TARGETOPT(OpenCLOptions, SupportedOpenCLOptions, - "Supported OpenCL extensions and optional core features.") - -TYPED_TARGETOPT( - std::vector, OpenCLExtensionsAsWritten, - "The list of OpenCL extensions to enable or disable, as written on " - "the command line.") - -TYPED_TARGETOPT( - bool, ForceEnableInt128, - "If given, enables support for __int128_t and __uint128_t types.") - -TYPED_TARGETOPT( - bool, NVPTXUseShortPointers, - "If enabled, use 32-bit pointers for accessing const/local/shared " - "address space.") - -TYPED_TARGETOPT( - std::string, CodeModel, - "The code model to be used as specified by the user. Corresponds to " - "CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, " - "plus \"default\" for the case when the user has not explicitly " - "specified a code model.") - -TYPED_TARGETOPT( - llvm::VersionTuple, SDKVersion, - "The version of the SDK which was used during the compilation. The option " - "is used for two different purposes. On Darwin the version is propagated " - "to LLVM where it's used to support SDK Version metadata (See D55673). " - "CUDA compilation uses it to control parts of CUDA compilation in clang " - "that depend on specific version of the CUDA SDK.") - -#undef TYPED_TARGETOPT diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h index 1771c3bdbb611..d1cc024957dae 100644 --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -25,9 +25,69 @@ namespace clang { /// Options for controlling the target. class TargetOptions { public: -#define TYPED_TARGETOPT(Type, Name, Description) Type Name; -#include "clang/Basic/TargetOptions.def" - TargetOptions() : ForceEnableInt128(false), NVPTXUseShortPointers(false) {} + /// The name of the target triple to compile for. + std::string Triple; + + /// When compiling for the device side, contains the triple used to compile + /// for the host. + std::string HostTriple; + + /// If given, the name of the target CPU to generate code for. + std::string CPU; + + /// If given, the name of the target CPU to tune code for. + std::string TuneCPU; + + /// If given, the unit to use for floating point math. + std::string FPMath; + + /// If given, the name of the target ABI to use. + std::string ABI; + + /// The EABI version to use + llvm::EABI EABIVersion; + + /// If given, the version string of the linker in use. + std::string LinkerVersion; + + /// The list of target specific features to enable or disable, as written on the command line. + std::vector FeaturesAsWritten; + + /// The list of target specific features to enable or disable -- this should + /// be a list of strings starting with by '+' or '-'. + std::vector Features; + + /// The map of which features have been enabled disabled based on the command + /// line. + llvm::StringMap FeatureMap; + + /// Supported OpenCL extensions and optional core features. + OpenCLOptions SupportedOpenCLOptions; + + /// The list of OpenCL extensions to enable or disable, as written on + /// the command line. + std::vector OpenCLExtensionsAsWritten; + + /// If given, enables support for __int128_t and __uint128_t types. + bool ForceEnableInt128 = false; + + /// \brief If enabled, use 32-bit pointers for accessing const/local/shared + /// address space. + bool NVPTXUseShortPointers = false; + + // The code model to be used as specified by the user. Corresponds to + // CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, plus + // "default" for the case when the user has not explicitly specified a + // code model. + std::string CodeModel; + + /// The version of the SDK which was used during the compilation. + /// The option is used for two different purposes: + /// * on darwin the version is propagated to LLVM where it's used + /// to support SDK Version metadata (See D55673). + /// * CUDA compilation uses it to control parts of CUDA compilation + /// in clang that depend on specific version of the CUDA SDK. + llvm::VersionTuple SDKVersion; }; } // end namespace clang diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index c8a95ae69d72b..c723fc084c854 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -19,10 +19,9 @@ #include "clang/Frontend/FrontendOptions.h" #include "clang/Frontend/MigratorOptions.h" #include "clang/Frontend/PreprocessorOutputOptions.h" -#include "clang/Sema/CodeCompleteOptions.h" #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/ArrayRef.h" #include #include @@ -87,9 +86,6 @@ class CompilerInvocationBase { LangOptions *getLangOpts() { return LangOpts.get(); } const LangOptions *getLangOpts() const { return LangOpts.get(); } - CommentOptions &getCommentOpts() { return LangOpts->CommentOpts; } - const CommentOptions &getCommentOpts() const { return LangOpts->CommentOpts; } - TargetOptions &getTargetOpts() { return *TargetOpts.get(); } const TargetOptions &getTargetOpts() const { return *TargetOpts.get(); } @@ -231,14 +227,6 @@ class CompilerInvocation : public CompilerInvocationBase { FrontendOptions &getFrontendOpts() { return FrontendOpts; } const FrontendOptions &getFrontendOpts() const { return FrontendOpts; } - CodeCompleteOptions &getCodeCompleteOpts() { - return FrontendOpts.CodeCompleteOpts; - } - - const CodeCompleteOptions &getCodeCompleteOpts() const { - return FrontendOpts.CodeCompleteOpts; - } - PreprocessorOutputOptions &getPreprocessorOutputOpts() { return PreprocessorOutputOpts; } diff --git a/clang/include/clang/Frontend/DependencyOutputOptions.def b/clang/include/clang/Frontend/DependencyOutputOptions.def deleted file mode 100644 index 0fbd90226f11e..0000000000000 --- a/clang/include/clang/Frontend/DependencyOutputOptions.def +++ /dev/null @@ -1,50 +0,0 @@ -//===--- DependencyOutputOptions.def -------------------------------C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the DependencyOutput options, to use this file one needs -// to define the TYPED_DEPENDENCY_OUTPUTOPT and/or the DEPENDECY_OUTPUTOPT macro -// to get more information about bitfields. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_DEPENDENCY_OUTPUTOPT -#define TYPED_DEPENDENCY_OUTPUTOPT(Type, Name, Description) -#endif - -#ifndef DEPENDENCY_OUTPUTOPT -#define DEPENDENCY_OUTPUTOPT(Name, Bits, Description) \ - TYPED_DEPENDENCY_OUTPUTOPT(unsigned, Name, Description) -#endif - -DEPENDENCY_OUTPUTOPT(IncludeSystemHeaders, 1, "Include system header dependencies.") -DEPENDENCY_OUTPUTOPT(ShowHeaderIncludes, 1, "Show header inclusions (-H).") -DEPENDENCY_OUTPUTOPT(UsePhonyTargets, 1, "Include phony targets for each dependency, which can " - "avoid some 'make' problems.") -DEPENDENCY_OUTPUTOPT(AddMissingHeaderDeps, 1, "Add missing headers to dependency list.") -DEPENDENCY_OUTPUTOPT(IncludeModuleFiles, 1, "Include module file dependencies.") - -TYPED_DEPENDENCY_OUTPUTOPT(ShowIncludesDestination, ShowIncludesDest, "Destination of cl.exe style /showIncludes info.") - -TYPED_DEPENDENCY_OUTPUTOPT(DependencyOutputFormat, OutputFormat, "The format for the dependency file") - -TYPED_DEPENDENCY_OUTPUTOPT(std::string, OutputFile, "The file to write dependency output to.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::string, HeaderIncludeOutputFile, "The file to write header include output to. This is orthogonal to ShowHeaderIncludes (-H) and will include headers mentioned in the predefines buffer. If the output file is \"-\", output will be sent to stderr.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::vector, Targets, "A list of names to use as the targets in the dependency file; this list must contain at least one entry.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::vector, ExtraDeps, "A list of filenames to be used as extra dependencies for every target.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::string, ShowIncludesPretendHeader, "In /showIncludes mode, pretend the main TU is a header with this name.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::string, DOTOutputFile, "The file to write GraphViz-formatted header dependencies to.") - -TYPED_DEPENDENCY_OUTPUTOPT(std::string, ModuleDependencyOutputDir, "The directory to copy module dependencies to when collecting them.") - -#undef TYPED_DEPENDENCY_OUTPUTOPT -#undef DEPENDENCY_OUTPUTOPT \ No newline at end of file diff --git a/clang/include/clang/Frontend/DependencyOutputOptions.h b/clang/include/clang/Frontend/DependencyOutputOptions.h index 581e9b5a544b8..7a4f3337936fc 100644 --- a/clang/include/clang/Frontend/DependencyOutputOptions.h +++ b/clang/include/clang/Frontend/DependencyOutputOptions.h @@ -24,15 +24,49 @@ enum class DependencyOutputFormat { Make, NMake }; /// file generation. class DependencyOutputOptions { public: -#define TYPED_DEPENDENCY_OUTPUTOPT(Type, Name, Description) Type Name; -#define DEPENDENCY_OUTPUTOPT(Name, Bits, Description) unsigned Name : Bits; -#include "clang/Frontend/DependencyOutputOptions.def" + unsigned IncludeSystemHeaders : 1; ///< Include system header dependencies. + unsigned ShowHeaderIncludes : 1; ///< Show header inclusions (-H). + unsigned UsePhonyTargets : 1; ///< Include phony targets for each + /// dependency, which can avoid some 'make' + /// problems. + unsigned AddMissingHeaderDeps : 1; ///< Add missing headers to dependency list + unsigned IncludeModuleFiles : 1; ///< Include module file dependencies. + + /// Destination of cl.exe style /showIncludes info. + ShowIncludesDestination ShowIncludesDest = ShowIncludesDestination::None; + + /// The format for the dependency file. + DependencyOutputFormat OutputFormat = DependencyOutputFormat::Make; + + /// The file to write dependency output to. + std::string OutputFile; + + /// The file to write header include output to. This is orthogonal to + /// ShowHeaderIncludes (-H) and will include headers mentioned in the + /// predefines buffer. If the output file is "-", output will be sent to + /// stderr. + std::string HeaderIncludeOutputFile; + + /// A list of names to use as the targets in the dependency file; this list + /// must contain at least one entry. + std::vector Targets; + + /// A list of filenames to be used as extra dependencies for every target. + std::vector ExtraDeps; + + /// In /showIncludes mode, pretend the main TU is a header with this name. + std::string ShowIncludesPretendHeader; + + /// The file to write GraphViz-formatted header dependencies to. + std::string DOTOutputFile; + + /// The directory to copy module dependencies to when collecting them. + std::string ModuleDependencyOutputDir; + public: DependencyOutputOptions() : IncludeSystemHeaders(0), ShowHeaderIncludes(0), UsePhonyTargets(0), - AddMissingHeaderDeps(0), IncludeModuleFiles(0), - ShowIncludesDest(ShowIncludesDestination::None), - OutputFormat(DependencyOutputFormat::Make) {} + AddMissingHeaderDeps(0), IncludeModuleFiles(0) {} }; } // end namespace clang diff --git a/clang/include/clang/Frontend/FrontendOptions.def b/clang/include/clang/Frontend/FrontendOptions.def deleted file mode 100644 index c6188d9cf0255..0000000000000 --- a/clang/include/clang/Frontend/FrontendOptions.def +++ /dev/null @@ -1,179 +0,0 @@ -//===--- FrontendOptions.def - FileSystem option database -----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the Frontend options. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_FRONTENDOPT -#define TYPED_FRONTENDOPT(Type, Name, Description) -#endif - -#ifndef FRONTENDOPT -#define FRONTENDOPT(Name, Bits, Description) \ - TYPED_FRONTENDOPT(unsigned, Name, Description) -#endif - -FRONTENDOPT(DisableFree, 1, "Disable memory freeing on exit.") - -FRONTENDOPT(RelocatablePCH, 1, - "When generating PCH files, instruct the AST writer to create " - "relocatable PCH files.") - -FRONTENDOPT(ShowHelp, 1, "Show the -help text.") - -FRONTENDOPT(ShowStats, 1, "Show frontend performance metrics and statistics.") - -FRONTENDOPT(ShowTimers, 1, "Show timers for individual actions.") - -FRONTENDOPT(PrintSupportedCPUs, 1, - "print the supported cpus for the current target") - -FRONTENDOPT(TimeTrace, 1, "Output time trace profile.") - -FRONTENDOPT(ShowVersion, 1, "Show the -version text.") - -FRONTENDOPT(FixWhatYouCan, 1, "Apply fixes even if there are unfixable errors.") - -FRONTENDOPT(FixOnlyWarnings, 1, "Apply fixes only for warnings.") - -FRONTENDOPT(FixAndRecompile, 1, "Apply fixes and recompile.") - -FRONTENDOPT(FixToTemporaries, 1, "Apply fixes to temporary files.") - -FRONTENDOPT(ARCMTAction, 3, "") - -FRONTENDOPT(ARCMTMigrateEmitARCErrors, 1, - "Emit ARC errors even if the migrator can fix them.") - -FRONTENDOPT(SkipFunctionBodies, 1, - "Skip over function bodies to speed up parsing in cases you do not " - "need them (e.g. with code completion).") - -FRONTENDOPT(UseGlobalModuleIndex, 1, - "Whether we can use the global module index if available.") - -FRONTENDOPT(GenerateGlobalModuleIndex, 1, - "Whether we can generate the global module index if needed.") - -FRONTENDOPT(ASTDumpDecls, 1, - "Whether we include declaration dumps in AST dumps.") - -FRONTENDOPT(ASTDumpAll, 1, - "Whether we deserialize all decls when forming AST dumps.") - -FRONTENDOPT(ASTDumpLookups, 1, - "Whether we include lookup table dumps in AST dumps.") - -FRONTENDOPT(ASTDumpDeclTypes, 1, - "Whether we include declaration type dumps in AST dumps.") - -FRONTENDOPT(BuildingImplicitModule, 1, - "Whether we are performing an implicit module build.") - -FRONTENDOPT(ModulesEmbedAllFiles, 1, - "Whether we should embed all used files into the PCM file.") - -FRONTENDOPT(IncludeTimestamps, 1, - "Whether timestamps should be written to the produced PCH file.") - -FRONTENDOPT(UseTemporary, 1, - "Should a temporary file be used during compilation.") - -FRONTENDOPT(IsSystemModule, 1, - "When using -emit-module, treat the modulemap as a system module.") - -TYPED_FRONTENDOPT(ASTDumpOutputFormat, ASTDumpFormat, - "Specifies the output format of the AST.") - -TYPED_FRONTENDOPT(unsigned, ObjCMTAction, "") - -TYPED_FRONTENDOPT(std::string, ObjCMTWhiteListPath, "") - -TYPED_FRONTENDOPT(std::string, MTMigrateDir, "") - -TYPED_FRONTENDOPT(std::string, ARCMTMigrateReportOut, "") - -TYPED_FRONTENDOPT(InputsTy, Inputs, "The input files and their types.") - -TYPED_FRONTENDOPT( - std::string, OriginalModuleMap, - "When the input is a module map, the original module map file from which " - "that map was inferred, if any (for umbrella modules).") - -TYPED_FRONTENDOPT(std::string, OutputFile, "The output file, if any.") - -TYPED_FRONTENDOPT(std::string, FixItSuffix, - "If given, the new suffix for fix-it rewritten files.") - -TYPED_FRONTENDOPT(std::string, ASTDumpFilter, - "If given, filter dumped AST Decl nodes by this substring.") - -TYPED_FRONTENDOPT(ParsedSourceLocation, CodeCompletionAt, - "If given, enable code completion at the provided location.") - -TYPED_FRONTENDOPT(frontend::ActionKind, ProgramAction, - "The frontend action to perform.") - -TYPED_FRONTENDOPT(std::string, ActionName, - "The name of the action to run when using a plugin action.") - -TYPED_FRONTENDOPT(PluginArgsTy, PluginArgs, "Args to pass to the plugins") - -TYPED_FRONTENDOPT( - std::vector, AddPluginActions, - "The list of plugin actions to run in addition to the normal action.") - -TYPED_FRONTENDOPT(std::vector, Plugins, - "The list of plugins to load.") - -TYPED_FRONTENDOPT(std::vector>, - ModuleFileExtensions, "The list of module file extensions.") - -TYPED_FRONTENDOPT( - std::vector, ModuleMapFiles, - "The list of module map files to load before processing the input.") - -TYPED_FRONTENDOPT(std::vector, ModuleFiles, - "The list of additional prebuilt module files to load before " - "processing the input.") - -TYPED_FRONTENDOPT(std::vector, ModulesEmbedFiles, - "The list of files to embed into the compiled module file.") - -TYPED_FRONTENDOPT(std::vector, ASTMergeFiles, - "The list of AST files to merge.") - -TYPED_FRONTENDOPT( - std::vector, LLVMArgs, - "A list of arguments to forward to LLVM's option processing; this should " - "only be used for debugging and experimental features.") - -TYPED_FRONTENDOPT(std::string, OverrideRecordLayoutsFile, - "File name of the file that will provide record layouts (in " - "the format produced by -fdump-record-layouts).") - -TYPED_FRONTENDOPT(std::string, AuxTriple, - "Auxiliary triple for CUDA/HIP compilation.") - -TYPED_FRONTENDOPT(Optional, AuxTargetCPU, - "Auxiliary target CPU for CUDA/HIP compilation.") - -TYPED_FRONTENDOPT(Optional>, AuxTargetFeatures, - "Auxiliary target features for CUDA/HIP compilation.") - -TYPED_FRONTENDOPT(std::string, StatsFile, "Filename to write statistics to.") - -TYPED_FRONTENDOPT( - unsigned, TimeTraceGranularity, - "Minimum time granularity (in microseconds) traced by time profiler.") - -TYPED_FRONTENDOPT(InputKind, DashX, "Input Kind") - -#undef TYPED_FRONTENDOPT -#undef FRONTENDOPT \ No newline at end of file diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 5dccdf50ca462..b2be33032c08d 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -226,14 +226,94 @@ class FrontendInputFile { /// FrontendOptions - Options for controlling the behavior of the frontend. class FrontendOptions { public: - using PluginArgsTy = - std::unordered_map>; + /// Disable memory freeing on exit. + unsigned DisableFree : 1; - using InputsTy = llvm::SmallVector; + /// When generating PCH files, instruct the AST writer to create relocatable + /// PCH files. + unsigned RelocatablePCH : 1; + + /// Show the -help text. + unsigned ShowHelp : 1; + + /// Show frontend performance metrics and statistics. + unsigned ShowStats : 1; + + /// Show timers for individual actions. + unsigned ShowTimers : 1; + + /// print the supported cpus for the current target + unsigned PrintSupportedCPUs : 1; + + /// Output time trace profile. + unsigned TimeTrace : 1; + + /// Show the -version text. + unsigned ShowVersion : 1; + + /// Apply fixes even if there are unfixable errors. + unsigned FixWhatYouCan : 1; + + /// Apply fixes only for warnings. + unsigned FixOnlyWarnings : 1; + + /// Apply fixes and recompile. + unsigned FixAndRecompile : 1; + + /// Apply fixes to temporary files. + unsigned FixToTemporaries : 1; + + /// Emit ARC errors even if the migrator can fix them. + unsigned ARCMTMigrateEmitARCErrors : 1; + + /// Skip over function bodies to speed up parsing in cases you do not need + /// them (e.g. with code completion). + unsigned SkipFunctionBodies : 1; + + /// Whether we can use the global module index if available. + unsigned UseGlobalModuleIndex : 1; + + /// Whether we can generate the global module index if needed. + unsigned GenerateGlobalModuleIndex : 1; + + /// Whether we include declaration dumps in AST dumps. + unsigned ASTDumpDecls : 1; + + /// Whether we deserialize all decls when forming AST dumps. + unsigned ASTDumpAll : 1; + + /// Whether we include lookup table dumps in AST dumps. + unsigned ASTDumpLookups : 1; + + /// Whether we include declaration type dumps in AST dumps. + unsigned ASTDumpDeclTypes : 1; + + /// Whether we are performing an implicit module build. + unsigned BuildingImplicitModule : 1; + + /// Whether we should embed all used files into the PCM file. + unsigned ModulesEmbedAllFiles : 1; + + /// Whether timestamps should be written to the produced PCH file. + unsigned IncludeTimestamps : 1; + + /// Should a temporary file be used during compilation. + unsigned UseTemporary : 1; + + /// When using -emit-module, treat the modulemap as a system module. + unsigned IsSystemModule : 1; CodeCompleteOptions CodeCompleteOpts; - enum { ARCMT_None, ARCMT_Check, ARCMT_Modify, ARCMT_Migrate }; + /// Specifies the output format of the AST. + ASTDumpOutputFormat ASTDumpFormat = ADOF_Default; + + enum { + ARCMT_None, + ARCMT_Check, + ARCMT_Modify, + ARCMT_Migrate + } ARCMTAction = ARCMT_None; enum { ObjCMT_None = 0, @@ -280,18 +360,92 @@ class FrontendOptions { /// Enable converting setter/getter expressions to property-dot syntx. ObjCMT_PropertyDotSyntax = 0x1000, - ObjCMT_MigrateDecls = - (ObjCMT_ReadonlyProperty | ObjCMT_ReadwriteProperty | - ObjCMT_Annotation | ObjCMT_Instancetype | ObjCMT_NsMacros | - ObjCMT_ProtocolConformance | ObjCMT_NsAtomicIOSOnlyProperty | - ObjCMT_DesignatedInitializer), + ObjCMT_MigrateDecls = (ObjCMT_ReadonlyProperty | ObjCMT_ReadwriteProperty | + ObjCMT_Annotation | ObjCMT_Instancetype | + ObjCMT_NsMacros | ObjCMT_ProtocolConformance | + ObjCMT_NsAtomicIOSOnlyProperty | + ObjCMT_DesignatedInitializer), ObjCMT_MigrateAll = (ObjCMT_Literals | ObjCMT_Subscripting | ObjCMT_MigrateDecls | ObjCMT_PropertyDotSyntax) }; + unsigned ObjCMTAction = ObjCMT_None; + std::string ObjCMTWhiteListPath; + + std::string MTMigrateDir; + std::string ARCMTMigrateReportOut; + + /// The input files and their types. + SmallVector Inputs; + + /// When the input is a module map, the original module map file from which + /// that map was inferred, if any (for umbrella modules). + std::string OriginalModuleMap; + + /// The output file, if any. + std::string OutputFile; + + /// If given, the new suffix for fix-it rewritten files. + std::string FixItSuffix; + + /// If given, filter dumped AST Decl nodes by this substring. + std::string ASTDumpFilter; + + /// If given, enable code completion at the provided location. + ParsedSourceLocation CodeCompletionAt; + + /// The frontend action to perform. + frontend::ActionKind ProgramAction = frontend::ParseSyntaxOnly; + + /// The name of the action to run when using a plugin action. + std::string ActionName; + + /// Args to pass to the plugins + std::unordered_map> PluginArgs; + + /// The list of plugin actions to run in addition to the normal action. + std::vector AddPluginActions; + + /// The list of plugins to load. + std::vector Plugins; + + /// The list of module file extensions. + std::vector> ModuleFileExtensions; + + /// The list of module map files to load before processing the input. + std::vector ModuleMapFiles; + + /// The list of additional prebuilt module files to load before + /// processing the input. + std::vector ModuleFiles; + + /// The list of files to embed into the compiled module file. + std::vector ModulesEmbedFiles; + + /// The list of AST files to merge. + std::vector ASTMergeFiles; + + /// A list of arguments to forward to LLVM's option processing; this + /// should only be used for debugging and experimental features. + std::vector LLVMArgs; + + /// File name of the file that will provide record layouts + /// (in the format produced by -fdump-record-layouts). + std::string OverrideRecordLayoutsFile; + + /// Auxiliary triple for CUDA/HIP compilation. + std::string AuxTriple; + + /// Auxiliary target CPU for CUDA/HIP compilation. + Optional AuxTargetCPU; + + /// Auxiliary target features for CUDA/HIP compilation. + Optional> AuxTargetFeatures; + + /// Filename to write statistics to. + std::string StatsFile; -#define FRONTENDOPT(Name, Bits, Description) unsigned Name : Bits; -#define TYPED_FRONTENDOPT(Type, Name, Description) Type Name; -#include "clang/Frontend/FrontendOptions.def" + /// Minimum time granularity (in microseconds) traced by time profiler. + unsigned TimeTraceGranularity; public: FrontendOptions() @@ -299,14 +453,11 @@ class FrontendOptions { ShowStats(false), ShowTimers(false), TimeTrace(false), ShowVersion(false), FixWhatYouCan(false), FixOnlyWarnings(false), FixAndRecompile(false), FixToTemporaries(false), - ARCMTAction(ARCMT_None), ARCMTMigrateEmitARCErrors(false), - SkipFunctionBodies(false), UseGlobalModuleIndex(true), - GenerateGlobalModuleIndex(true), ASTDumpDecls(false), - ASTDumpLookups(false), BuildingImplicitModule(false), - ModulesEmbedAllFiles(false), IncludeTimestamps(true), - UseTemporary(true), ASTDumpFormat(ADOF_Default), - ObjCMTAction(ObjCMT_None), ProgramAction(frontend::ParseSyntaxOnly), - TimeTraceGranularity(500), DashX() {} + ARCMTMigrateEmitARCErrors(false), SkipFunctionBodies(false), + UseGlobalModuleIndex(true), GenerateGlobalModuleIndex(true), + ASTDumpDecls(false), ASTDumpLookups(false), + BuildingImplicitModule(false), ModulesEmbedAllFiles(false), + IncludeTimestamps(true), UseTemporary(true), TimeTraceGranularity(500) {} /// getInputKindForExtension - Return the appropriate input kind for a file /// extension. For example, "c" would return Language::C. diff --git a/clang/include/clang/Frontend/MigratorOptions.def b/clang/include/clang/Frontend/MigratorOptions.def deleted file mode 100644 index fbbcc6b686fdf..0000000000000 --- a/clang/include/clang/Frontend/MigratorOptions.def +++ /dev/null @@ -1,27 +0,0 @@ -//===--- MigratorOptions.def - Migrator option database ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the migrator options. Users of this file must -// define the TYPED_MIGRATOROPT macro to make use of this information. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_MIGRATOROPT -#define TYPED_MIGRATOROPT(Type, Name, Description) -#endif - -#ifndef MIGRATOROPT -#define MIGRATOROPT(Name, Bits, Description) \ - TYPED_MIGRATOROPT(unsigned, Name, Description) -#endif - -MIGRATOROPT(NoNSAllocReallocError, 1, "") -MIGRATOROPT(NoFinalizeRemoval, 1, "") - -#undef TYPED_MIGRATOROPT -#undef MIGRATOROPT diff --git a/clang/include/clang/Frontend/MigratorOptions.h b/clang/include/clang/Frontend/MigratorOptions.h index f5ee9bba9dec0..cf50ffcf0c4f5 100644 --- a/clang/include/clang/Frontend/MigratorOptions.h +++ b/clang/include/clang/Frontend/MigratorOptions.h @@ -18,10 +18,13 @@ namespace clang { class MigratorOptions { public: -#define MIGRATOROPT(Name, Bits, Description) unsigned Name : Bits; -#include "clang/Frontend/MigratorOptions.def" - - MigratorOptions() : NoNSAllocReallocError(0), NoFinalizeRemoval(0) {} + unsigned NoNSAllocReallocError : 1; + unsigned NoFinalizeRemoval : 1; + MigratorOptions() { + NoNSAllocReallocError = 0; + NoFinalizeRemoval = 0; + } }; + } #endif diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.def b/clang/include/clang/Frontend/PreprocessorOutputOptions.def deleted file mode 100644 index aad2f5eb7294b..0000000000000 --- a/clang/include/clang/Frontend/PreprocessorOutputOptions.def +++ /dev/null @@ -1,46 +0,0 @@ -//=== PreprocessorOutputOptions.def - FileSystem option database -*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the PreprocessorOutput options. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_PREPROCESSOR_OUTPUTOPT -#define TYPED_PREPROCESSOR_OUTPUTOPT(Type, Name, Description) -#endif - -#ifndef PREPROCESSOR_OUTPUTOPT -#define PREPROCESSOR_OUTPUTOPT(Name, Bits, Description) \ - TYPED_PREPROCESSOR_OUTPUTOPT(unsigned, Name, Description) -#endif - -PREPROCESSOR_OUTPUTOPT(ShowCPP, 1, "Print normal preprocessed output.") - -PREPROCESSOR_OUTPUTOPT(ShowComments, 1, "Show comments.") - -PREPROCESSOR_OUTPUTOPT(ShowLineMarkers, 1, "Show \#line markers.") - -PREPROCESSOR_OUTPUTOPT(UseLineDirectives, 1, - "Use \#line instead of GCC-style \# N.") - -PREPROCESSOR_OUTPUTOPT(ShowMacroComments, 1, "Show comments, even in macros.") - -PREPROCESSOR_OUTPUTOPT(ShowMacros, 1, "Print macro definitions.") - -PREPROCESSOR_OUTPUTOPT( - ShowIncludeDirectives, 1, - "Print includes, imports etc. within preprocessed output.") - -PREPROCESSOR_OUTPUTOPT(RewriteIncludes, 1, - "Preprocess include directives only.") - -PREPROCESSOR_OUTPUTOPT(RewriteImports, 1, - "Include contents of transitively-imported modules.") - -#undef TYPED_PREPROCESSOR_OUTPUTOPT -#undef PREPROCESSOR_OUTPUTOPT \ No newline at end of file diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h index ab4f25e394a27..72e5ad1137fb7 100644 --- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h +++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h @@ -15,9 +15,15 @@ namespace clang { /// output (e.g., -E). class PreprocessorOutputOptions { public: -#define PREPROCESSOR_OUTPUTOPT(Name, Bits, Description) unsigned Name : Bits; -#define TYPED_PREPROCESSOR_OUTPUTOPT(Type, Name, Description) Type Name; -#include "clang/Frontend/PreprocessorOutputOptions.def" + unsigned ShowCPP : 1; ///< Print normal preprocessed output. + unsigned ShowComments : 1; ///< Show comments. + unsigned ShowLineMarkers : 1; ///< Show \#line markers. + unsigned UseLineDirectives : 1; ///< Use \#line instead of GCC-style \# N. + unsigned ShowMacroComments : 1; ///< Show comments, even in macros. + unsigned ShowMacros : 1; ///< Print macro definitions. + unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output. + unsigned RewriteIncludes : 1; ///< Preprocess include directives only. + unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules. public: PreprocessorOutputOptions() { diff --git a/clang/include/clang/Lex/HeaderSearchOptions.def b/clang/include/clang/Lex/HeaderSearchOptions.def deleted file mode 100644 index 79fd196c8f905..0000000000000 --- a/clang/include/clang/Lex/HeaderSearchOptions.def +++ /dev/null @@ -1,136 +0,0 @@ -//===--- HeaderSearchOptions.def - HeaderSearch option database -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the header search options. Users of this file must -// define the HEADERSEARCHOPT macro to make use of this information. -// -//===----------------------------------------------------------------------===// - -#ifndef HEADERSEARCHOPT -#define HEADERSEARCHOPT(Name, Bits, Description) \ - TYPED_HEADERSEARCHOPT(unsigned, Name, Description) -#endif - -#ifndef TYPED_HEADERSEARCHOPT -#define TYPED_HEADERSEARCHOPT(Type, Name, Description) -#endif - -TYPED_HEADERSEARCHOPT(std::string, Sysroot, - "If non-empty, the directory to use as a \"virtual " - "system root\" for include paths.") - -TYPED_HEADERSEARCHOPT(std::string, ModuleFormat, - "The module/pch container format.") - -HEADERSEARCHOPT(DisableModuleHash, 1, - "Whether we should disable the use of the hash string within " - "the module cache. Note: Only used for testing!") - -HEADERSEARCHOPT(ImplicitModuleMaps, 1, - "Implicit module maps. This option is enabld by default when " - "modules is enabled.") - -HEADERSEARCHOPT( - ModuleMapFileHomeIsCwd, 1, - "Set the 'home directory' of a module map file to the current working " - "directory (or the home directory of the module map file that contained " - "the 'extern module' directive importing this module map file if any) " - "rather than the directory containing the module map file. The home " - "directory is where we look for files named in the module map file.") - -HEADERSEARCHOPT(UseBuiltinIncludes, 1, "Include the compiler builtin includes.") - -HEADERSEARCHOPT(UseStandardSystemIncludes, 1, - "Include the system standard include search directories.") - -HEADERSEARCHOPT( - UseStandardCXXIncludes, 1, - "Include the system standard C++ library include search directories.") - -HEADERSEARCHOPT(UseLibcxx, 1, "Use libc++ instead of the default libstdc++.") - -HEADERSEARCHOPT(Verbose, 1, - "Whether header search information should be output as for -v.") - -HEADERSEARCHOPT( - ModulesValidateOncePerBuildSession, 1, - "If true, skip verifying input files used by modules if the module was " - "already verified during this build session (see BuildSessionTimestamp).") - -HEADERSEARCHOPT( - ModulesValidateSystemHeaders, 1, - "Whether to validate system input files when a module is loaded.") - -HEADERSEARCHOPT(ValidateASTInputFilesContent, 1, - "Whether the content of input files should be hashed and used " - "to validate consistency.") - -HEADERSEARCHOPT(UseDebugInfo, 1, - "Whether the module includes debug information (-gmodules).") - -HEADERSEARCHOPT(ModulesValidateDiagnosticOptions, 1, "") - -HEADERSEARCHOPT(ModulesHashContent, 1, "") - -HEADERSEARCHOPT(ModulesStrictContextHash, 1, - "Whether we should include all things that could impact the " - "module in the hash. This includes things like the full header " - "search path, and enabled diagnostics.") - -TYPED_HEADERSEARCHOPT(std::vector, UserEntries, - "User specified include entries.") - -TYPED_HEADERSEARCHOPT(std::vector, SystemHeaderPrefixes, - "User-specified system header prefixes.") - -TYPED_HEADERSEARCHOPT(std::string, ResourceDir, - "The directory which holds the compiler resource files " - "(builtin includes, etc.).") - -TYPED_HEADERSEARCHOPT(std::string, ModuleCachePath, - "The directory used for the module cache.") - -TYPED_HEADERSEARCHOPT(std::string, ModuleUserBuildPath, - "The directory used for a user build.") - -TYPED_HEADERSEARCHOPT(PrebuiltModuleFilesTy, PrebuiltModuleFiles, - "The mapping of module names to prebuilt module files.") - -TYPED_HEADERSEARCHOPT(std::vector, PrebuiltModulePaths, - "The directories used to load prebuilt module files.") - -TYPED_HEADERSEARCHOPT( - unsigned, ModuleCachePruneInterval, - "The interval (in seconds) between pruning operations. This operation is " - "expensive, because it requires Clang to walk through the directory " - "structure of the module cache, stat()'ing and removing files. The " - "default value is large, e.g., the operation runs once a week.") - -TYPED_HEADERSEARCHOPT( - unsigned, ModuleCachePruneAfter, - "The time (in seconds) after which an unused module file will be " - "considered unused and will, therefore, be pruned. When the module cache " - "is pruned, any module file that has not been accessed in this many " - "seconds will be removed. The default value is large, e.g., a month, to " - "avoid forcing infrequently-used modules to be regenerated often.") - -TYPED_HEADERSEARCHOPT( - uint64_t, BuildSessionTimestamp, - "The time in seconds when the build session started. This time is used " - "by other optimizations in header search and module loading.") - -TYPED_HEADERSEARCHOPT(ModulesIgnoreMacrosTy, ModulesIgnoreMacros, - "The set of macro names that should be ignored for the " - "purposes of computing the module hash.") - -TYPED_HEADERSEARCHOPT( - std::vector, VFSOverlayFiles, - "The set of user-provided virtual filesystem overlay files.") - -#undef HEADERSEARCHOPT -#undef TYPED_HEADERSEARCHOPT diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h index 41a7ca915d794..3af49e1753956 100644 --- a/clang/include/clang/Lex/HeaderSearchOptions.h +++ b/clang/include/clang/Lex/HeaderSearchOptions.h @@ -94,14 +94,125 @@ class HeaderSearchOptions { : Prefix(Prefix), IsSystemHeader(IsSystemHeader) {} }; - using PrebuiltModuleFilesTy = std::map>; + /// If non-empty, the directory to use as a "virtual system root" for include + /// paths. + std::string Sysroot; + + /// User specified include entries. + std::vector UserEntries; + + /// User-specified system header prefixes. + std::vector SystemHeaderPrefixes; + + /// The directory which holds the compiler resource files (builtin includes, + /// etc.). + std::string ResourceDir; - using ModulesIgnoreMacrosTy = - llvm::SmallSetVector; + /// The directory used for the module cache. + std::string ModuleCachePath; + + /// The directory used for a user build. + std::string ModuleUserBuildPath; + + /// The mapping of module names to prebuilt module files. + std::map> PrebuiltModuleFiles; + + /// The directories used to load prebuilt module files. + std::vector PrebuiltModulePaths; + + /// The module/pch container format. + std::string ModuleFormat; + + /// Whether we should disable the use of the hash string within the + /// module cache. + /// + /// Note: Only used for testing! + unsigned DisableModuleHash : 1; + + /// Implicit module maps. This option is enabld by default when + /// modules is enabled. + unsigned ImplicitModuleMaps : 1; + + /// Set the 'home directory' of a module map file to the current + /// working directory (or the home directory of the module map file that + /// contained the 'extern module' directive importing this module map file + /// if any) rather than the directory containing the module map file. + // + /// The home directory is where we look for files named in the module map + /// file. + unsigned ModuleMapFileHomeIsCwd : 1; + + /// The interval (in seconds) between pruning operations. + /// + /// This operation is expensive, because it requires Clang to walk through + /// the directory structure of the module cache, stat()'ing and removing + /// files. + /// + /// The default value is large, e.g., the operation runs once a week. + unsigned ModuleCachePruneInterval = 7 * 24 * 60 * 60; + + /// The time (in seconds) after which an unused module file will be + /// considered unused and will, therefore, be pruned. + /// + /// When the module cache is pruned, any module file that has not been + /// accessed in this many seconds will be removed. The default value is + /// large, e.g., a month, to avoid forcing infrequently-used modules to be + /// regenerated often. + unsigned ModuleCachePruneAfter = 31 * 24 * 60 * 60; + + /// The time in seconds when the build session started. + /// + /// This time is used by other optimizations in header search and module + /// loading. + uint64_t BuildSessionTimestamp = 0; + + /// The set of macro names that should be ignored for the purposes + /// of computing the module hash. + llvm::SmallSetVector ModulesIgnoreMacros; + + /// The set of user-provided virtual filesystem overlay files. + std::vector VFSOverlayFiles; + + /// Include the compiler builtin includes. + unsigned UseBuiltinIncludes : 1; + + /// Include the system standard include search directories. + unsigned UseStandardSystemIncludes : 1; + + /// Include the system standard C++ library include search directories. + unsigned UseStandardCXXIncludes : 1; + + /// Use libc++ instead of the default libstdc++. + unsigned UseLibcxx : 1; + + /// Whether header search information should be output as for -v. + unsigned Verbose : 1; + + /// If true, skip verifying input files used by modules if the + /// module was already verified during this build session (see + /// \c BuildSessionTimestamp). + unsigned ModulesValidateOncePerBuildSession : 1; + + /// Whether to validate system input files when a module is loaded. + unsigned ModulesValidateSystemHeaders : 1; -#define HEADERSEARCHOPT(Name, Bits, Description) unsigned Name : Bits; -#define TYPED_HEADERSEARCHOPT(Type, Name, Description) Type Name; -#include "clang/Lex/HeaderSearchOptions.def" + // Whether the content of input files should be hashed and used to + // validate consistency. + unsigned ValidateASTInputFilesContent : 1; + + /// Whether the module includes debug information (-gmodules). + unsigned UseDebugInfo : 1; + + unsigned ModulesValidateDiagnosticOptions : 1; + + unsigned ModulesHashContent : 1; + + /// Whether we should include all things that could impact the module in the + /// hash. + /// + /// This includes things like the full header search path, and enabled + /// diagnostics. + unsigned ModulesStrictContextHash : 1; HeaderSearchOptions(StringRef _Sysroot = "/") : Sysroot(_Sysroot), ModuleFormat("raw"), DisableModuleHash(false), @@ -112,9 +223,7 @@ class HeaderSearchOptions { ModulesValidateSystemHeaders(false), ValidateASTInputFilesContent(false), UseDebugInfo(false), ModulesValidateDiagnosticOptions(true), ModulesHashContent(false), - ModulesStrictContextHash(false), - ModuleCachePruneInterval(7 * 24 * 60 * 60), - ModuleCachePruneAfter(31 * 24 * 60 * 60), BuildSessionTimestamp(0) {} + ModulesStrictContextHash(false) {} /// AddPath - Add the \p Path path to the specified \p Group list. void AddPath(StringRef Path, frontend::IncludeDirGroup Group, diff --git a/clang/include/clang/Lex/PreprocessorOptions.def b/clang/include/clang/Lex/PreprocessorOptions.def deleted file mode 100644 index 5b9e982351a0d..0000000000000 --- a/clang/include/clang/Lex/PreprocessorOptions.def +++ /dev/null @@ -1,166 +0,0 @@ -//===--- PreprocessorOptions.def - Preprocessor option database -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the preprocessor options. Users of this file must -// define the TYPED_PREPROCESSOROPT macro to make use of this information. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_PREPROCESSOROPT -#error Define the TYPED_PREPROCESSOROPT macro to handle target options -#endif - -TYPED_PREPROCESSOROPT(MacrosTy, Macros, "") - -TYPED_PREPROCESSOROPT(std::vector, Includes, "") - -TYPED_PREPROCESSOROPT(std::vector, MacroIncludes, "") - -TYPED_PREPROCESSOROPT( - bool, UsePredefines, - "Initialize the preprocessor with the compiler and target " - "specific predefines.") - -TYPED_PREPROCESSOROPT( - bool, DetailedRecord, - "Whether we should maintain a detailed record of all macro " - "definitions and expansions.") - -TYPED_PREPROCESSOROPT( - bool, PCHWithHdrStop, - "When true, we are creating or using a PCH where a #pragma hdrstop is " - "expected to indicate the beginning or end of the PCH.") - -TYPED_PREPROCESSOROPT( - bool, PCHWithHdrStopCreate, - "When true, we are creating a PCH or creating the PCH object " - "while expecting a #pragma hdrstop to separate the two. Allow " - "for a missing #pragma hdrstop, which generates a PCH for the " - "whole file, and creates an empty PCH object.") - -TYPED_PREPROCESSOROPT( - std::string, PCHThroughHeader, - "If non-empty, the filename used in an #include directive in the primary " - "source file (or command-line preinclude) that is used to implement " - "MSVC-style precompiled headers. When creating a PCH, after the #include " - "of this header, the PCH generation stops. When using a PCH, tokens are " - "skipped until after an #include of this header is seen.") - -TYPED_PREPROCESSOROPT( - std::string, ImplicitPCHInclude, - "The implicit PCH included at the start of the translation unit, or empty.") - -TYPED_PREPROCESSOROPT( - std::vector, ChainedIncludes, - "Headers that will be converted to chained PCHs in memory.") - -TYPED_PREPROCESSOROPT( - bool, DisablePCHValidation, - "When true, disables most of the normal validation performed " - "on precompiled headers.") - -TYPED_PREPROCESSOROPT( - bool, AllowPCHWithCompilerErrors, - "When true, a PCH with compiler errors will not be rejected.") - -TYPED_PREPROCESSOROPT( - bool, DumpDeserializedPCHDecls, - "Dump declarations that are deserialized from PCH, for testing.") - -TYPED_PREPROCESSOROPT( - std::set, DeserializedPCHDeclsToErrorOn, - "This is a set of names for decls that we do not want to be deserialized, " - "and we emit an error if they are; for testing purposes.") - -TYPED_PREPROCESSOROPT( - PrecompiledPreambleBytesTy, PrecompiledPreambleBytes, - "If non-zero, the implicit PCH include is actually a precompiled preamble " - "that covers this number of bytes in the main source file. The boolean " - "indicates whether the preamble ends at the start of a new line.") - -TYPED_PREPROCESSOROPT( - bool, GeneratePreamble, - "True indicates that a preamble is being generated. When the " - "lexer is done, one of the things that need to be preserved is " - "the conditional #if stack, so the ASTWriter/ASTReader can " - "save/restore it when processing the rest of the file.") - -TYPED_PREPROCESSOROPT( - bool, WriteCommentListToPCH, - "Whether to write comment locations into the PCH when building " - "it. Reading the comments from the PCH can be a performance " - "hit even if the clients don't use them.") - -TYPED_PREPROCESSOROPT( - bool, SingleFileParseMode, - "When enabled, preprocessor is in a mode for parsing a single " - "file only. Disables #includes of other files and if there are " - "unresolved identifiers in preprocessor directive conditions " - "it causes all blocks to be parsed so that the client can get " - "the maximum amount of information from the parser.") - -TYPED_PREPROCESSOROPT( - bool, LexEditorPlaceholders, - "When enabled, the preprocessor will construct editor placeholder tokens.") - -TYPED_PREPROCESSOROPT( - bool, RemappedFilesKeepOriginalName, - "True if the SourceManager should report the original file name for " - "contents of files that were remapped to other files. Defaults to true.") - -TYPED_PREPROCESSOROPT( - RemappedFilesTy, RemappedFiles, - "The set of file remappings, which take existing files on the system (the " - "first part of each pair) and gives them the contents of other files on " - "the system (the second part of each pair).") - -TYPED_PREPROCESSOROPT( - RemappedFileBuffersTy, RemappedFileBuffers, - "The set of file-to-buffer remappings, which take existing files on the " - "system (the first part of each pair) and gives them the contents of the " - "specified memory buffer (the second part of each pair).") - -TYPED_PREPROCESSOROPT( - bool, RetainRemappedFileBuffers, - "Whether the compiler instance should retain (i.e., not free) " - "the buffers associated with remapped files. This flag " - "defaults to false; it can be set true only through direct " - "manipulation of the compiler invocation object, in cases " - "where the compiler invocation and its buffers will be reused.") - -TYPED_PREPROCESSOROPT( - bool, RetainExcludedConditionalBlocks, - "When enabled, excluded conditional blocks retain in the main file.") - -TYPED_PREPROCESSOROPT( - ObjCXXARCStandardLibraryKind, ObjCXXARCStandardLibrary, - "The Objective-C++ ARC standard library that we should support, by " - "providing appropriate definitions to retrofit the standard library with " - "support for lifetime-qualified pointers.") - -TYPED_PREPROCESSOROPT(std::shared_ptr, FailedModules, "") - -TYPED_PREPROCESSOROPT(MacroPrefixMapTy, MacroPrefixMap, - "A prefix map for __FILE__ and __BASE_FILE__.") - -TYPED_PREPROCESSOROPT( - ExcludedPreprocessorDirectiveSkipMapping *, - ExcludedConditionalDirectiveSkipMappings, - "Contains the currently active skipped range mappings for " - "skipping excluded conditional directives. The pointer is " - "passed to the Preprocessor when it's constructed. The pointer " - "is unowned, the client is responsible for its lifetime.") - -TYPED_PREPROCESSOROPT(bool, SetUpStaticAnalyzer, - "Set up preprocessor for RunAnalysis action.") - -TYPED_PREPROCESSOROPT( - bool, DisablePragmaDebugCrash, - "Prevents intended crashes when using #pragma clang __debug. For testing.") - -#undef TYPED_PREPROCESSOROPT diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index f379d50532287..c551f87e0d7bf 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -44,13 +44,114 @@ enum ObjCXXARCStandardLibraryKind { /// used in preprocessor initialization to InitializePreprocessor(). class PreprocessorOptions { public: - using MacrosTy = std::vector>; - using PrecompiledPreambleBytesTy = std::pair; - using RemappedFilesTy = std::vector>; - using RemappedFileBuffersTy = - std::vector>; - using MacroPrefixMapTy = - std::map>; + std::vector> Macros; + std::vector Includes; + std::vector MacroIncludes; + + /// Initialize the preprocessor with the compiler and target specific + /// predefines. + bool UsePredefines = true; + + /// Whether we should maintain a detailed record of all macro + /// definitions and expansions. + bool DetailedRecord = false; + + /// When true, we are creating or using a PCH where a #pragma hdrstop is + /// expected to indicate the beginning or end of the PCH. + bool PCHWithHdrStop = false; + + /// When true, we are creating a PCH or creating the PCH object while + /// expecting a #pragma hdrstop to separate the two. Allow for a + /// missing #pragma hdrstop, which generates a PCH for the whole file, + /// and creates an empty PCH object. + bool PCHWithHdrStopCreate = false; + + /// If non-empty, the filename used in an #include directive in the primary + /// source file (or command-line preinclude) that is used to implement + /// MSVC-style precompiled headers. When creating a PCH, after the #include + /// of this header, the PCH generation stops. When using a PCH, tokens are + /// skipped until after an #include of this header is seen. + std::string PCHThroughHeader; + + /// The implicit PCH included at the start of the translation unit, or empty. + std::string ImplicitPCHInclude; + + /// Headers that will be converted to chained PCHs in memory. + std::vector ChainedIncludes; + + /// When true, disables most of the normal validation performed on + /// precompiled headers. + bool DisablePCHValidation = false; + + /// When true, a PCH with compiler errors will not be rejected. + bool AllowPCHWithCompilerErrors = false; + + /// Dump declarations that are deserialized from PCH, for testing. + bool DumpDeserializedPCHDecls = false; + + /// This is a set of names for decls that we do not want to be + /// deserialized, and we emit an error if they are; for testing purposes. + std::set DeserializedPCHDeclsToErrorOn; + + /// If non-zero, the implicit PCH include is actually a precompiled + /// preamble that covers this number of bytes in the main source file. + /// + /// The boolean indicates whether the preamble ends at the start of a new + /// line. + std::pair PrecompiledPreambleBytes; + + /// True indicates that a preamble is being generated. + /// + /// When the lexer is done, one of the things that need to be preserved is the + /// conditional #if stack, so the ASTWriter/ASTReader can save/restore it when + /// processing the rest of the file. + bool GeneratePreamble = false; + + /// Whether to write comment locations into the PCH when building it. + /// Reading the comments from the PCH can be a performance hit even if the + /// clients don't use them. + bool WriteCommentListToPCH = true; + + /// When enabled, preprocessor is in a mode for parsing a single file only. + /// + /// Disables #includes of other files and if there are unresolved identifiers + /// in preprocessor directive conditions it causes all blocks to be parsed so + /// that the client can get the maximum amount of information from the parser. + bool SingleFileParseMode = false; + + /// When enabled, the preprocessor will construct editor placeholder tokens. + bool LexEditorPlaceholders = true; + + /// True if the SourceManager should report the original file name for + /// contents of files that were remapped to other files. Defaults to true. + bool RemappedFilesKeepOriginalName = true; + + /// The set of file remappings, which take existing files on + /// the system (the first part of each pair) and gives them the + /// contents of other files on the system (the second part of each + /// pair). + std::vector> RemappedFiles; + + /// The set of file-to-buffer remappings, which take existing files + /// on the system (the first part of each pair) and gives them the contents + /// of the specified memory buffer (the second part of each pair). + std::vector> RemappedFileBuffers; + + /// Whether the compiler instance should retain (i.e., not free) + /// the buffers associated with remapped files. + /// + /// This flag defaults to false; it can be set true only through direct + /// manipulation of the compiler invocation object, in cases where the + /// compiler invocation and its buffers will be reused. + bool RetainRemappedFileBuffers = false; + + /// When enabled, excluded conditional blocks retain in the main file. + bool RetainExcludedConditionalBlocks = false; + + /// The Objective-C++ ARC standard library that we should support, + /// by providing appropriate definitions to retrofit the standard library + /// with support for lifetime-qualified pointers. + ObjCXXARCStandardLibraryKind ObjCXXARCStandardLibrary = ARCXX_nolib; /// Records the set of modules class FailedModulesSet { @@ -66,21 +167,33 @@ class PreprocessorOptions { } }; -#define TYPED_PREPROCESSOROPT(Type, Name, Description) Type Name; -#include "clang/Lex/PreprocessorOptions.def" - - PreprocessorOptions() - : UsePredefines(true), DetailedRecord(false), PCHWithHdrStop(false), - PCHWithHdrStopCreate(false), DisablePCHValidation(false), - AllowPCHWithCompilerErrors(false), DumpDeserializedPCHDecls(false), - PrecompiledPreambleBytes(0, false), GeneratePreamble(false), - WriteCommentListToPCH(true), SingleFileParseMode(false), - LexEditorPlaceholders(true), RemappedFilesKeepOriginalName(true), - RetainRemappedFileBuffers(false), - RetainExcludedConditionalBlocks(false), - ObjCXXARCStandardLibrary(ARCXX_nolib), - ExcludedConditionalDirectiveSkipMappings(nullptr), - SetUpStaticAnalyzer(false), DisablePragmaDebugCrash(false) {} + /// The set of modules that failed to build. + /// + /// This pointer will be shared among all of the compiler instances created + /// to (re)build modules, so that once a module fails to build anywhere, + /// other instances will see that the module has failed and won't try to + /// build it again. + std::shared_ptr FailedModules; + + /// A prefix map for __FILE__ and __BASE_FILE__. + std::map> MacroPrefixMap; + + /// Contains the currently active skipped range mappings for skipping excluded + /// conditional directives. + /// + /// The pointer is passed to the Preprocessor when it's constructed. The + /// pointer is unowned, the client is responsible for its lifetime. + ExcludedPreprocessorDirectiveSkipMapping + *ExcludedConditionalDirectiveSkipMappings = nullptr; + + /// Set up preprocessor for RunAnalysis action. + bool SetUpStaticAnalyzer = false; + + /// Prevents intended crashes when using #pragma clang __debug. For testing. + bool DisablePragmaDebugCrash = false; + +public: + PreprocessorOptions() : PrecompiledPreambleBytes(0, false) {} void addMacroDef(StringRef Name) { Macros.emplace_back(std::string(Name), false); diff --git a/clang/include/clang/Sema/CodeCompleteOptions.def b/clang/include/clang/Sema/CodeCompleteOptions.def deleted file mode 100644 index dab8027929e5e..0000000000000 --- a/clang/include/clang/Sema/CodeCompleteOptions.def +++ /dev/null @@ -1,51 +0,0 @@ -//===--- CodeCompleteOptions.def - FileSystem option database ----*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the CodeComplete options. -// -//===----------------------------------------------------------------------===// - -#ifndef TYPED_CODE_COMPLETEOPT -#define TYPED_CODE_COMPLETEOPT(Type, Name, Description) -#endif - -#ifndef CODE_COMPLETEOPT -#define CODE_COMPLETEOPT(Name, Bits, Description) \ - TYPED_CODE_COMPLETEOPT(unsigned, Name, Description); -#endif - -CODE_COMPLETEOPT(IncludeMacros, 1, "Show macros in code completion results.") - -CODE_COMPLETEOPT(IncludeCodePatterns, 1, - "Show code patterns in code completion results.") - -CODE_COMPLETEOPT(IncludeGlobals, 1, - "Show top-level decls in code completion results.") - -CODE_COMPLETEOPT(IncludeNamespaceLevelDecls, 1, - "Show decls in namespace (including the global namespace) in " - "code completion results. If this is 0, `IncludeGlobals` will " - "be ignored. Currently, this only works when completing " - "qualified IDs (i.e. `Sema::CodeCompleteQualifiedId`). FIXME: " - "consider supporting more completion cases with this option.") - -CODE_COMPLETEOPT( - IncludeBriefComments, 1, - "Show brief documentation comments in code completion results.") - -CODE_COMPLETEOPT(LoadExternal, 1, - "Hint whether to load data from the external AST to provide " - "full results. If false, namespace-level declarations and " - "macros from the preamble may be omitted.") - -CODE_COMPLETEOPT(IncludeFixIts, 1, - "Include results after corrections (small fix-its), e.g. " - "change '.' to '->' on member access, etc.") - -#undef TYPED_CODE_COMPLETEOPT -#undef CODE_COMPLETEOPT \ No newline at end of file diff --git a/clang/include/clang/Sema/CodeCompleteOptions.h b/clang/include/clang/Sema/CodeCompleteOptions.h index 28cbc94fc84c2..a3403b01dcde9 100644 --- a/clang/include/clang/Sema/CodeCompleteOptions.h +++ b/clang/include/clang/Sema/CodeCompleteOptions.h @@ -14,14 +14,39 @@ namespace clang { /// Options controlling the behavior of code completion. class CodeCompleteOptions { public: -#define CODE_COMPLETEOPT(Name, Bits, Description) unsigned Name : Bits; -#define TYPED_CODE_COMPLETEOPT(Type, Name, Description) Type Name; -#include "clang/Sema/CodeCompleteOptions.def" + /// Show macros in code completion results. + unsigned IncludeMacros : 1; + + /// Show code patterns in code completion results. + unsigned IncludeCodePatterns : 1; + + /// Show top-level decls in code completion results. + unsigned IncludeGlobals : 1; + + /// Show decls in namespace (including the global namespace) in code + /// completion results. If this is 0, `IncludeGlobals` will be ignored. + /// + /// Currently, this only works when completing qualified IDs (i.e. + /// `Sema::CodeCompleteQualifiedId`). + /// FIXME: consider supporting more completion cases with this option. + unsigned IncludeNamespaceLevelDecls : 1; + + /// Show brief documentation comments in code completion results. + unsigned IncludeBriefComments : 1; + + /// Hint whether to load data from the external AST to provide full results. + /// If false, namespace-level declarations and macros from the preamble may be + /// omitted. + unsigned LoadExternal : 1; + + /// Include results after corrections (small fix-its), e.g. change '.' to '->' + /// on member access, etc. + unsigned IncludeFixIts : 1; CodeCompleteOptions() : IncludeMacros(0), IncludeCodePatterns(0), IncludeGlobals(1), - IncludeNamespaceLevelDecls(1), IncludeBriefComments(0), LoadExternal(1), - IncludeFixIts(0) {} + IncludeNamespaceLevelDecls(1), IncludeBriefComments(0), + LoadExternal(1), IncludeFixIts(0) {} }; } // namespace clang diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index ff253ca15c0ea..f0359d2dbb3c2 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -6,10 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines the analyzer options avaible with -analyzer-config, using -// the ANLAYZER_OPTION and ANALYZER_OPTION_DEPENDS_ON_USER_MODE macros. -// Other analyzer options use the simpler ANALYZEROPT and TYPED_ANALYZEROPT -// macro. +// This file defines the analyzer options avaible with -analyzer-config. // //===----------------------------------------------------------------------===// @@ -32,15 +29,6 @@ define both 'ANALYZER_OPTION' and 'ANALYZER_OPTION_DEPENDS_ON_USER_MODE' macros! #endif #endif -#ifndef TYPED_ANALYZEROPT -#define TYPED_ANALYZEROPT(TYPE, NAME, DESCRIPTION) -#endif - -#ifndef ANALYZEROPT -#define ANALYZEROPT(NAME, BITS, DESCRIPTION) \ - TYPED_ANALYZEROPT(unsigned, NAME, DESCRITPTION) -#endif - #ifndef ANALYZER_OPTION /// Create a new analyzer option, but dont generate a method for it in /// AnalyzerOptions. @@ -54,8 +42,7 @@ define both 'ANALYZER_OPTION' and 'ANALYZER_OPTION_DEPENDS_ON_USER_MODE' macros! /// (-analyzer-config CMDFLAG=VALUE) /// DESC - Description of the flag. /// DEFAULT_VAL - The default value for CMDFLAG. -#define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL) \ - TYPED_ANALYZEROPT(TYPE, NAME, DESC) +#define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL) #endif #ifndef ANALYZER_OPTION_DEPENDS_ON_USER_MODE @@ -75,8 +62,7 @@ define both 'ANALYZER_OPTION' and 'ANALYZER_OPTION_DEPENDS_ON_USER_MODE' macros! /// DEEP_VAL - The default value for CMDFLAG, when "user-mode" was set to /// "deep". #define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC, \ - SHALLOW_VAL, DEEP_VAL) \ - TYPED_ANALYZEROPT(TYPE, NAME, DESC) + SHALLOW_VAL, DEEP_VAL) #endif //===----------------------------------------------------------------------===// @@ -449,79 +435,5 @@ ANALYZER_OPTION_DEPENDS_ON_USER_MODE( "\"basic-inlining\", \"inlining\", \"dynamic\", \"dynamic-bifurcate\".", /* SHALLOW_VAL */ "inlining", /* DEEP_VAL */ "dynamic-bifurcate") -//===----------------------------------------------------------------------===// -// Other analyzer options. -//===----------------------------------------------------------------------===// - -TYPED_ANALYZEROPT(CheckersAndPackagesTy, CheckersAndPackages, - "Pairs of checker/package name and enable/disable.") - -TYPED_ANALYZEROPT( - std::vector, SilencedCheckersAndPackages, - "Vector of checker/package names which will not emit warnings.") - -TYPED_ANALYZEROPT(ConfigTable, Config, - "A key-value table of use-specified configuration values.") -TYPED_ANALYZEROPT(AnalysisStores, AnalysisStoreOpt, "") -TYPED_ANALYZEROPT(AnalysisConstraints, AnalysisConstraintsOpt, "") -TYPED_ANALYZEROPT(AnalysisDiagClients, AnalysisDiagOpt, "") -TYPED_ANALYZEROPT(AnalysisPurgeMode, AnalysisPurgeOpt, "") - -TYPED_ANALYZEROPT(std::string, AnalyzeSpecificFunction, "") - -TYPED_ANALYZEROPT(std::string, DumpExplodedGraphTo, - "File path to which the exploded graph should be dumped.") - -TYPED_ANALYZEROPT(std::string, FullCompilerInvocation, - "Store full compiler invocation for reproducible " - "instructions in the generated report.") - -TYPED_ANALYZEROPT(unsigned, maxBlockVisitOnPath, - "The maximum number of times the analyzer visits a block.") - -ANALYZEROPT( - DisableAllCheckers, 1, - "Disable all analyzer checkers. This flag allows one to disable analyzer " - "checkers on the code processed by the given analysis consumer. Note, the " - "code will get parsed and the command-line options will get checked.") - -ANALYZEROPT(ShowCheckerHelp, 1, "") -ANALYZEROPT(ShowCheckerHelpAlpha, 1, "") -ANALYZEROPT(ShowCheckerHelpDeveloper, 1, "") - -ANALYZEROPT(ShowCheckerOptionList, 1, "") -ANALYZEROPT(ShowCheckerOptionAlphaList, 1, "") -ANALYZEROPT(ShowCheckerOptionDeveloperList, 1, "") - -ANALYZEROPT(ShowEnabledCheckerList, 1, "") -ANALYZEROPT(ShowConfigOptionsList, 1, "") -ANALYZEROPT(ShouldEmitErrorsOnInvalidConfigValue, 1, "") -ANALYZEROPT(AnalyzeAll, 1, "") -ANALYZEROPT(AnalyzerDisplayProgress, 1, "") -ANALYZEROPT(AnalyzeNestedBlocks, 1, "") - -ANALYZEROPT(eagerlyAssumeBinOpBifurcation, 1, "") - -ANALYZEROPT(TrimGraph, 1, "") -ANALYZEROPT(visualizeExplodedGraphWithGraphViz, 1, "") -ANALYZEROPT(UnoptimizedCFG, 1, "") -ANALYZEROPT(PrintStats, 1, "") - -ANALYZEROPT( - NoRetryExhausted, 1, - "Do not re-analyze paths leading to exhausted nodes with a different " - "strategy. We get better code coverage when retry is enabled.") - -ANALYZEROPT(AnalyzerWerror, 1, "Emit analyzer warnings as errors.") - -TYPED_ANALYZEROPT(unsigned, InlineMaxStackDepth, - "The inlining stack depth limit. Cap the stack depth at 4 " - "calls (5 stack frames, base + 4 calls).") - -TYPED_ANALYZEROPT(AnalysisInliningMode, InliningMode, - "The mode of function selection used during inlining.") - #undef ANALYZER_OPTION_DEPENDS_ON_USER_MODE #undef ANALYZER_OPTION -#undef TYPED_ANALYZEROPT -#undef ANALYZEROPT diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h index 8d81f90294174..4907b0757a8a4 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h @@ -162,7 +162,6 @@ enum UserModeKind { class AnalyzerOptions : public RefCountedBase { public: using ConfigTable = llvm::StringMap; - using CheckersAndPackagesTy = std::vector>; /// Retrieves the list of checkers generated from Checkers.td. This doesn't /// contain statically linked but non-generated checkers and plugin checkers! @@ -196,9 +195,86 @@ class AnalyzerOptions : public RefCountedBase { size_t InitialPad, size_t EntryWidth, size_t MinLineWidth = 0); -#define ANALYZEROPT(NAME, BITS, DESCRIPTION) unsigned NAME : BITS; -#define TYPED_ANALYZEROPT(TYPE, NAME, DESCRIPTION) TYPE NAME; + /// Pairs of checker/package name and enable/disable. + std::vector> CheckersAndPackages; + + /// Vector of checker/package names which will not emit warnings. + std::vector SilencedCheckersAndPackages; + + /// A key-value table of use-specified configuration values. + // TODO: This shouldn't be public. + ConfigTable Config; + AnalysisStores AnalysisStoreOpt = RegionStoreModel; + AnalysisConstraints AnalysisConstraintsOpt = RangeConstraintsModel; + AnalysisDiagClients AnalysisDiagOpt = PD_HTML; + AnalysisPurgeMode AnalysisPurgeOpt = PurgeStmt; + + std::string AnalyzeSpecificFunction; + + /// File path to which the exploded graph should be dumped. + std::string DumpExplodedGraphTo; + + /// Store full compiler invocation for reproducible instructions in the + /// generated report. + std::string FullCompilerInvocation; + + /// The maximum number of times the analyzer visits a block. + unsigned maxBlockVisitOnPath; + + /// Disable all analyzer checkers. + /// + /// This flag allows one to disable analyzer checkers on the code processed by + /// the given analysis consumer. Note, the code will get parsed and the + /// command-line options will get checked. + unsigned DisableAllCheckers : 1; + + unsigned ShowCheckerHelp : 1; + unsigned ShowCheckerHelpAlpha : 1; + unsigned ShowCheckerHelpDeveloper : 1; + + unsigned ShowCheckerOptionList : 1; + unsigned ShowCheckerOptionAlphaList : 1; + unsigned ShowCheckerOptionDeveloperList : 1; + + unsigned ShowEnabledCheckerList : 1; + unsigned ShowConfigOptionsList : 1; + unsigned ShouldEmitErrorsOnInvalidConfigValue : 1; + unsigned AnalyzeAll : 1; + unsigned AnalyzerDisplayProgress : 1; + unsigned AnalyzeNestedBlocks : 1; + + unsigned eagerlyAssumeBinOpBifurcation : 1; + + unsigned TrimGraph : 1; + unsigned visualizeExplodedGraphWithGraphViz : 1; + unsigned UnoptimizedCFG : 1; + unsigned PrintStats : 1; + + /// Do not re-analyze paths leading to exhausted nodes with a different + /// strategy. We get better code coverage when retry is enabled. + unsigned NoRetryExhausted : 1; + + /// Emit analyzer warnings as errors. + unsigned AnalyzerWerror : 1; + + /// The inlining stack depth limit. + // Cap the stack depth at 4 calls (5 stack frames, base + 4 calls). + unsigned InlineMaxStackDepth = 5; + + /// The mode of function selection used during inlining. + AnalysisInliningMode InliningMode = NoRedundancy; + + // Create a field for each -analyzer-config option. +#define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC, \ + SHALLOW_VAL, DEEP_VAL) \ + ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, SHALLOW_VAL) + +#define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL) \ + TYPE NAME; + #include "clang/StaticAnalyzer/Core/AnalyzerOptions.def" +#undef ANALYZER_OPTION +#undef ANALYZER_OPTION_DEPENDS_ON_USER_MODE // Create an array of all -analyzer-config command line options. Sort it in // the constructor. @@ -223,19 +299,15 @@ class AnalyzerOptions : public RefCountedBase { } AnalyzerOptions() - : AnalysisStoreOpt(RegionStoreModel), - AnalysisConstraintsOpt(RangeConstraintsModel), AnalysisDiagOpt(PD_HTML), - AnalysisPurgeOpt(PurgeStmt), DisableAllCheckers(false), - ShowCheckerHelp(false), ShowCheckerHelpAlpha(false), - ShowCheckerHelpDeveloper(false), ShowCheckerOptionList(false), - ShowCheckerOptionAlphaList(false), + : DisableAllCheckers(false), ShowCheckerHelp(false), + ShowCheckerHelpAlpha(false), ShowCheckerHelpDeveloper(false), + ShowCheckerOptionList(false), ShowCheckerOptionAlphaList(false), ShowCheckerOptionDeveloperList(false), ShowEnabledCheckerList(false), ShowConfigOptionsList(false), AnalyzeAll(false), AnalyzerDisplayProgress(false), AnalyzeNestedBlocks(false), eagerlyAssumeBinOpBifurcation(false), TrimGraph(false), visualizeExplodedGraphWithGraphViz(false), UnoptimizedCFG(false), - PrintStats(false), NoRetryExhausted(false), AnalyzerWerror(false), - InlineMaxStackDepth(5), InliningMode(NoRedundancy) { + PrintStats(false), NoRetryExhausted(false), AnalyzerWerror(false) { llvm::sort(AnalyzerConfigCmdFlags); } diff --git a/clang/lib/Basic/CodeGenOptions.cpp b/clang/lib/Basic/CodeGenOptions.cpp index 9e04b5ced2bb3..4fc7a535c9eb9 100644 --- a/clang/lib/Basic/CodeGenOptions.cpp +++ b/clang/lib/Basic/CodeGenOptions.cpp @@ -10,9 +10,8 @@ #include namespace clang { -CodeGenOptions::CodeGenOptions() - : FPDenormalMode(llvm::DenormalMode::getIEEE()), - FP32DenormalMode(llvm::DenormalMode::getIEEE()), Argv0(nullptr) { + +CodeGenOptions::CodeGenOptions() { #define CODEGENOPT(Name, Bits, Default) Name = Default; #define ENUM_CODEGENOPT(Name, Type, Bits, Default) set##Name(Default); #include "clang/Basic/CodeGenOptions.def" diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index 344d326a92e48..c08670c87fb69 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -14,8 +14,7 @@ using namespace clang; -LangOptions::LangOptions() - : CFRuntime(CoreFoundationABI::Unspecified), IsHeaderFile(false) { +LangOptions::LangOptions() { #define LANGOPT(Name, Bits, Default, Description) Name = Default; #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) set##Name(Default); #include "clang/Basic/LangOptions.def" From 4cefa8614ffe18cf8de0fe86297df81f4385e08f Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Wed, 2 Sep 2020 11:20:47 -0700 Subject: [PATCH 027/465] [TSan][libdispatch] Fix compilation error on Linux The interceptor for the block variants of the API references the function versions (via `REAL(name##_f)`). On Linux, this accesses the underlying "real pointer", defined by the interceptor macro. So we need to declare interceptors in the right order to avoid undefined symbol compiler error: ``` error: no member named 'real_dispatch_async_and_wait_f' in namespace '__tsan::__interception' ``` rdar://68181542 --- compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp index b56cc2dab7044..cbbb7ecb2397e 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp @@ -240,10 +240,10 @@ SANITIZER_WEAK_IMPORT void dispatch_barrier_async_and_wait( SANITIZER_WEAK_IMPORT void dispatch_barrier_async_and_wait_f( dispatch_queue_t queue, void *context, dispatch_function_t work); -DISPATCH_INTERCEPT_SYNC_B(dispatch_async_and_wait, false) DISPATCH_INTERCEPT_SYNC_F(dispatch_async_and_wait_f, false) -DISPATCH_INTERCEPT_SYNC_B(dispatch_barrier_async_and_wait, true) +DISPATCH_INTERCEPT_SYNC_B(dispatch_async_and_wait, false) DISPATCH_INTERCEPT_SYNC_F(dispatch_barrier_async_and_wait_f, true) +DISPATCH_INTERCEPT_SYNC_B(dispatch_barrier_async_and_wait, true) #endif From 1426ac0482951d4cb98f84ab9fc89c745f95602e Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 2 Sep 2020 14:28:42 -0400 Subject: [PATCH 028/465] [gn build] Fix COMPILER_RT_HAS_* defines for libclang_rt.profile The cmake build uses COMPILER_RT_TARGET_HAS_* in the CMakeLists.txt but then translates it to -DCOMPILER_RT_HAS_* flags which the c++ code checks for. So we need to define the latter, not the former. --- llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn index 77e2b6d218f76..5fab007153e49 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn @@ -64,9 +64,9 @@ static_library("profile") { if (current_os != "win") { defines = [ - "COMPILER_RT_TARGET_HAS_ATOMICS", - "COMPILER_RT_TARGET_HAS_FCNTL_LCK", - "COMPILER_RT_TARGET_HAS_UNAME", + "COMPILER_RT_HAS_UNAME", + "COMPILER_RT_HAS_ATOMICS", + "COMPILER_RT_HAS_FCNTL_LCK", ] } } From e0e7eb2e2648aee83caf2ecfe2972ce2f653d306 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 2 Sep 2020 18:01:18 +0200 Subject: [PATCH 029/465] [clang] Add missing .def files to Clang's modulemap These new .def files weren't marked as textual so they ended up being compiled into the Clang module (which completely defeats the purpose of .def files). --- clang/include/clang/module.modulemap | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap index 13d4dbf9dc2e8..6290548b41f1c 100644 --- a/clang/include/clang/module.modulemap +++ b/clang/include/clang/module.modulemap @@ -52,8 +52,10 @@ module Clang_Basic { textual header "Basic/BuiltinsX86_64.def" textual header "Basic/BuiltinsXCore.def" textual header "Basic/CodeGenOptions.def" + textual header "Basic/CommentOptions.def" textual header "Basic/DiagnosticOptions.def" textual header "Basic/Features.def" + textual header "Basic/FileSystemOptions.def" textual header "Basic/FPOptions.def" textual header "Basic/MSP430Target.def" textual header "Basic/LangOptions.def" @@ -63,6 +65,7 @@ module Clang_Basic { textual header "Basic/OpenMPKinds.def" textual header "Basic/OperatorKinds.def" textual header "Basic/Sanitizers.def" + textual header "Basic/TargetOptions.def" textual header "Basic/TokenKinds.def" textual header "Basic/X86Target.def" @@ -107,17 +110,35 @@ module Clang_Frontend { umbrella "Frontend" textual header "Basic/LangStandards.def" + textual header "Frontend/DependencyOutputOptions.def" + textual header "Frontend/FrontendOptions.def" + textual header "Frontend/MigratorOptions.def" + textual header "Frontend/PreprocessorOutputOptions.def" module * { export * } } module Clang_FrontendTool { requires cplusplus umbrella "FrontendTool" module * { export * } } module Clang_Index { requires cplusplus umbrella "Index" module * { export * } } -module Clang_Lex { requires cplusplus umbrella "Lex" module * { export * } } +module Clang_Lex { + requires cplusplus + umbrella "Lex" + textual header "Lex/HeaderSearchOptions.def" + textual header "Lex/PreprocessorOptions.def" + + module * { export * } +} module Clang_Parse { requires cplusplus umbrella "Parse" module * { export * } } module Clang_Rewrite { requires cplusplus umbrella "Rewrite/Core" module * { export * } } module Clang_RewriteFrontend { requires cplusplus umbrella "Rewrite/Frontend" module * { export * } } -module Clang_Sema { requires cplusplus umbrella "Sema" module * { export * } } +module Clang_Sema { + requires cplusplus + umbrella "Sema" + + textual header "Sema/CodeCompleteOptions.def" + + module * { export * } +} module Clang_Serialization { requires cplusplus From 9523cf02c22a83bece8d81080693a0cbf4098bb5 Mon Sep 17 00:00:00 2001 From: Erik Pilkington Date: Wed, 2 Sep 2020 12:23:17 -0400 Subject: [PATCH 030/465] [AST] Fix handling of long double and bool in __builtin_bit_cast On x86, long double has 6 unused trailing bytes. This patch changes the constant evaluator to treat them as though they were padding bytes, so reading from them results in an indeterminate value, and nothing is written for them. Also, fix a similar bug with bool, but instead of treating the unused bits as padding, enforce that they're zero. Differential revision: https://reviews.llvm.org/D76323 --- .../include/clang/Basic/DiagnosticASTKinds.td | 2 + clang/lib/AST/ExprConstant.cpp | 42 +++++++++++- .../SemaCXX/constexpr-builtin-bit-cast.cpp | 66 +++++++++++++++++++ 3 files changed, 107 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index 9be75f3751198..6a9ff309e49cb 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -298,6 +298,8 @@ def note_constexpr_bit_cast_invalid_subtype : Note< def note_constexpr_bit_cast_indet_dest : Note< "indeterminate value can only initialize an object of type 'unsigned char'" "%select{, 'char',|}1 or 'std::byte'; %0 is invalid">; +def note_constexpr_bit_cast_unrepresentable_value : Note< + "value %1 cannot be represented in type %0">; def note_constexpr_pseudo_destructor : Note< "pseudo-destructor call is not permitted in constant expressions " "until C++20">; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 014c48e6f08f0..e8f132dd48032 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -6627,9 +6627,15 @@ class APValueToBufferConverter { } bool visitInt(const APSInt &Val, QualType Ty, CharUnits Offset) { - CharUnits Width = Info.Ctx.getTypeSizeInChars(Ty); - SmallVector Bytes(Width.getQuantity()); - llvm::StoreIntToMemory(Val, &*Bytes.begin(), Width.getQuantity()); + APSInt AdjustedVal = Val; + unsigned Width = AdjustedVal.getBitWidth(); + if (Ty->isBooleanType()) { + Width = Info.Ctx.getTypeSize(Ty); + AdjustedVal = AdjustedVal.extend(Width); + } + + SmallVector Bytes(Width / 8); + llvm::StoreIntToMemory(AdjustedVal, &*Bytes.begin(), Width / 8); Buffer.writeObject(Offset, Bytes); return true; } @@ -6670,6 +6676,13 @@ class BufferToAPValueConverter { return None; } + llvm::NoneType unrepresentableValue(QualType Ty, const APSInt &Val) { + Info.FFDiag(BCE->getBeginLoc(), + diag::note_constexpr_bit_cast_unrepresentable_value) + << Ty << Val.toString(/*Radix=*/10); + return None; + } + Optional visit(const BuiltinType *T, CharUnits Offset, const EnumType *EnumSugar = nullptr) { if (T->isNullPtrType()) { @@ -6680,6 +6693,20 @@ class BufferToAPValueConverter { } CharUnits SizeOf = Info.Ctx.getTypeSizeInChars(T); + + // Work around floating point types that contain unused padding bytes. This + // is really just `long double` on x86, which is the only fundamental type + // with padding bytes. + if (T->isRealFloatingType()) { + const llvm::fltSemantics &Semantics = + Info.Ctx.getFloatTypeSemantics(QualType(T, 0)); + unsigned NumBits = llvm::APFloatBase::getSizeInBits(Semantics); + assert(NumBits % 8 == 0); + CharUnits NumBytes = CharUnits::fromQuantity(NumBits / 8); + if (NumBytes != SizeOf) + SizeOf = NumBytes; + } + SmallVector Bytes; if (!Buffer.readObject(Offset, SizeOf, Bytes)) { // If this is std::byte or unsigned char, then its okay to store an @@ -6704,6 +6731,15 @@ class BufferToAPValueConverter { if (T->isIntegralOrEnumerationType()) { Val.setIsSigned(T->isSignedIntegerOrEnumerationType()); + + unsigned IntWidth = Info.Ctx.getIntWidth(QualType(T, 0)); + if (IntWidth != Val.getBitWidth()) { + APSInt Truncated = Val.trunc(IntWidth); + if (Truncated.extend(Val.getBitWidth()) != Val) + return unrepresentableValue(QualType(T, 0), Val); + Val = Truncated; + } + return APValue(Val); } diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp index 06771f8f3252a..5b5d1cb7bc807 100644 --- a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp +++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp @@ -23,6 +23,10 @@ static_assert(sizeof(long long) == 8); template constexpr To bit_cast(const From &from) { static_assert(sizeof(To) == sizeof(From)); + // expected-note@+9 {{cannot be represented in type 'bool'}} +#ifdef __x86_64 + // expected-note@+7 {{or 'std::byte'; '__int128' is invalid}} +#endif #ifdef __CHAR_UNSIGNED__ // expected-note@+4 2 {{indeterminate value can only initialize an object of type 'unsigned char', 'char', or 'std::byte'; 'signed char' is invalid}} #else @@ -397,3 +401,65 @@ union IdentityInUnion { }; constexpr IdentityInUnion identity3a = {42}; constexpr unsigned char identity3b = __builtin_bit_cast(unsigned char, identity3a.n); + +namespace test_bool { + +constexpr bool test_bad_bool = bit_cast('A'); // expected-error {{must be initialized by a constant expression}} expected-note{{in call}} + +static_assert(round_trip(true), ""); +static_assert(round_trip(false), ""); +static_assert(round_trip(false), ""); + +static_assert(round_trip((char)0), ""); +static_assert(round_trip((char)1), ""); +} + +namespace test_long_double { +#ifdef __x86_64 +constexpr __int128_t test_cast_to_int128 = bit_cast<__int128_t>((long double)0); // expected-error{{must be initialized by a constant expression}} expected-note{{in call}} + +constexpr long double ld = 3.1425926539; + +struct bytes { + unsigned char d[16]; +}; + +static_assert(round_trip(ld), ""); + +static_assert(round_trip(10.0L)); + +constexpr bool f(bool read_uninit) { + bytes b = bit_cast(ld); + unsigned char ld_bytes[10] = { + 0x0, 0x48, 0x9f, 0x49, 0xf0, + 0x3c, 0x20, 0xc9, 0x0, 0x40, + }; + + for (int i = 0; i != 10; ++i) + if (ld_bytes[i] != b.d[i]) + return false; + + if (read_uninit && b.d[10]) // expected-note{{read of uninitialized object is not allowed in a constant expression}} + return false; + + return true; +} + +static_assert(f(/*read_uninit=*/false), ""); +static_assert(f(/*read_uninit=*/true), ""); // expected-error{{static_assert expression is not an integral constant expression}} expected-note{{in call to 'f(true)'}} + +constexpr bytes ld539 = { + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0xc0, 0x86, + 0x8, 0x40, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +constexpr long double fivehundredandthirtynine = 539.0; + +static_assert(bit_cast(ld539) == fivehundredandthirtynine, ""); + +#else +static_assert(round_trip<__int128_t>(34.0L)); +#endif +} From 2d11ae0a40e209a7b91aeff0c9cf28fe41dce93c Mon Sep 17 00:00:00 2001 From: Erik Pilkington Date: Wed, 2 Sep 2020 13:30:27 -0400 Subject: [PATCH 031/465] Fix a -Wparenthesis warning in 8ff44e644bb7, NFC --- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 1f362e2b6b318..3ecc8743265c0 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1989,7 +1989,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, } void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) { - assert(isa(GV) || !GV->isDeclaration() && + assert((isa(GV) || !GV->isDeclaration()) && "Only globals with definition can force usage."); LLVMUsed.emplace_back(GV); } From 01700c45eb22d848dd1dd980d7d46ae9aa034ade Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 2 Sep 2020 19:10:36 +0000 Subject: [PATCH 032/465] Store an Identifier instead of a StringRef for the OperationName inside an AbstractOperation (NFC) Instead of storing a StringRef, we keep an Identifier which otherwise requires a lock on the context to retrieve. This will allow to get an Identifier for any registered Operation for "free". Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D86994 --- mlir/include/mlir/IR/Identifier.h | 3 +++ mlir/include/mlir/IR/OperationSupport.h | 15 ++++++--------- mlir/lib/IR/MLIRContext.cpp | 19 +++++++++++++++++++ mlir/lib/IR/Operation.cpp | 9 +++++++-- mlir/lib/Parser/Parser.cpp | 4 ++-- 5 files changed, 37 insertions(+), 13 deletions(-) diff --git a/mlir/include/mlir/IR/Identifier.h b/mlir/include/mlir/IR/Identifier.h index ca1946bd8ad02..353dbc9020875 100644 --- a/mlir/include/mlir/IR/Identifier.h +++ b/mlir/include/mlir/IR/Identifier.h @@ -67,6 +67,9 @@ class Identifier { return Identifier(static_cast(entry)); } + /// Compare the underlying StringRef. + int compare(Identifier rhs) const { return strref().compare(rhs.strref()); } + private: /// This contains the bytes of the string, which is guaranteed to be nul /// terminated. diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index b0e1205eefe6f..7fce4b808d2e4 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -82,7 +82,7 @@ class AbstractOperation { using OperationProperties = uint32_t; /// This is the name of the operation. - const StringRef name; + const Identifier name; /// This is the dialect that this operation belongs to. Dialect &dialect; @@ -171,13 +171,7 @@ class AbstractOperation { SmallVectorImpl &results), void (&getCanonicalizationPatterns)(OwningRewritePatternList &results, MLIRContext *context), - detail::InterfaceMap &&interfaceMap, bool (&hasTrait)(TypeID traitID)) - : name(name), dialect(dialect), typeID(typeID), - parseAssembly(parseAssembly), printAssembly(printAssembly), - verifyInvariants(verifyInvariants), foldHook(foldHook), - getCanonicalizationPatterns(getCanonicalizationPatterns), - opProperties(opProperties), interfaceMap(std::move(interfaceMap)), - hasRawTrait(hasTrait) {} + detail::InterfaceMap &&interfaceMap, bool (&hasTrait)(TypeID traitID)); /// The properties of the operation. const OperationProperties opProperties; @@ -302,9 +296,12 @@ class OperationName { /// Return the operation name with dialect name stripped, if it has one. StringRef stripDialect() const; - /// Return the name of this operation. This always succeeds. + /// Return the name of this operation. This always succeeds. StringRef getStringRef() const; + /// Return the name of this operation as an identifier. This always succeeds. + Identifier getIdentifier() const; + /// If this operation has a registered operation description, return it. /// Otherwise return null. const AbstractOperation *getAbstractOperation() const; diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index 67658a9ca33a1..a6246024a5aed 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -668,6 +668,25 @@ const AbstractOperation *AbstractOperation::lookup(StringRef opName, return nullptr; } +AbstractOperation::AbstractOperation( + StringRef name, Dialect &dialect, OperationProperties opProperties, + TypeID typeID, + ParseResult (&parseAssembly)(OpAsmParser &parser, OperationState &result), + void (&printAssembly)(Operation *op, OpAsmPrinter &p), + LogicalResult (&verifyInvariants)(Operation *op), + LogicalResult (&foldHook)(Operation *op, ArrayRef operands, + SmallVectorImpl &results), + void (&getCanonicalizationPatterns)(OwningRewritePatternList &results, + MLIRContext *context), + detail::InterfaceMap &&interfaceMap, bool (&hasTrait)(TypeID traitID)) + : name(Identifier::get(name, dialect.getContext())), dialect(dialect), + typeID(typeID), parseAssembly(parseAssembly), + printAssembly(printAssembly), verifyInvariants(verifyInvariants), + foldHook(foldHook), + getCanonicalizationPatterns(getCanonicalizationPatterns), + opProperties(opProperties), interfaceMap(std::move(interfaceMap)), + hasRawTrait(hasTrait) {} + /// Get the dialect that registered the type with the provided typeid. const AbstractType &AbstractType::lookup(TypeID typeID, MLIRContext *context) { auto &impl = context->getImpl(); diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index 67249b83b1047..b8f9e6c9fdfc4 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -45,11 +45,16 @@ StringRef OperationName::stripDialect() const { return splitName.second.empty() ? splitName.first : splitName.second; } -/// Return the name of this operation. This always succeeds. +/// Return the name of this operation. This always succeeds. StringRef OperationName::getStringRef() const { + return getIdentifier().strref(); +} + +/// Return the name of this operation as an identifier. This always succeeds. +Identifier OperationName::getIdentifier() const { if (auto *op = representation.dyn_cast()) return op->name; - return representation.get().strref(); + return representation.get(); } const AbstractOperation *OperationName::getAbstractOperation() const { diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp index d6065f758fc13..48651a98561cf 100644 --- a/mlir/lib/Parser/Parser.cpp +++ b/mlir/lib/Parser/Parser.cpp @@ -863,8 +863,8 @@ class CustomOpAsmParser : public OpAsmParser { /// Emit a diagnostic at the specified location and return failure. InFlightDiagnostic emitError(llvm::SMLoc loc, const Twine &message) override { emittedError = true; - return parser.emitError(loc, "custom op '" + opDefinition->name + "' " + - message); + return parser.emitError(loc, "custom op '" + opDefinition->name.strref() + + "' " + message); } llvm::SMLoc getCurrentLocation() override { From 5d1fe3f903b9f46b994956f3b214305be119c4e2 Mon Sep 17 00:00:00 2001 From: Albion Fung Date: Wed, 2 Sep 2020 14:16:09 -0500 Subject: [PATCH 033/465] [PowerPC] Implemented Vector Multiply Builtins This patch implements the builtins for Vector Multiply Builtins (vmulxxd family of instructions), and adds the appropriate test cases for these builtins. The builtins utilize the vector multiply instructions itnroduced with ISA 3.1. Differential Revision: https://reviews.llvm.org/D83955 --- clang/include/clang/Basic/BuiltinsPPC.def | 5 ++ clang/lib/Headers/altivec.h | 50 ++++++++++++++++++ clang/test/CodeGen/builtins-ppc-p10vector.c | 38 ++++++++++++++ llvm/include/llvm/IR/IntrinsicsPowerPC.td | 14 +++++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 23 +++++--- .../CodeGen/PowerPC/p10-vector-multiply.ll | 52 +++++++++++++++++++ 6 files changed, 175 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index b9824588939b2..57ef39980c9b6 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -100,6 +100,11 @@ BUILTIN(__builtin_altivec_vmulouh, "V4UiV8UsV8Us", "") BUILTIN(__builtin_altivec_vmulosh, "V4SiV8SsV8Ss", "") BUILTIN(__builtin_altivec_vmulouw, "V2ULLiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vmulosw, "V2SLLiV4SiV4Si", "") +BUILTIN(__builtin_altivec_vmuleud, "V1ULLLiV2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vmulesd, "V1SLLLiV2SLLiV2SLLi", "") +BUILTIN(__builtin_altivec_vmuloud, "V1ULLLiV2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vmulosd, "V1SLLLiV2SLLiV2SLLi", "") +BUILTIN(__builtin_altivec_vmsumcud, "V1ULLLiV2ULLiV2ULLiV1ULLLi", "") BUILTIN(__builtin_altivec_vnmsubfp, "V4fV4fV4fV4f", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 927f25751664a..47119d7026838 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -5487,6 +5487,16 @@ vec_msum(vector unsigned short __a, vector unsigned short __b, return __builtin_altivec_vmsumuhm(__a, __b, __c); } +/* vec_msumc */ + +#ifdef __POWER10_VECTOR__ +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_msumc(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned __int128 __c) { + return __builtin_altivec_vmsumcud(__a, __b, __c); +} +#endif + /* vec_vmsummbm */ static __inline__ vector int __attribute__((__always_inline__)) @@ -5713,6 +5723,26 @@ vec_mule(vector unsigned int __a, vector unsigned int __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_mule(vector signed long long __a, vector signed long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmulosd(__a, __b); +#else + return __builtin_altivec_vmulesd(__a, __b); +#endif +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_mule(vector unsigned long long __a, vector unsigned long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmuloud(__a, __b); +#else + return __builtin_altivec_vmuleud(__a, __b); +#endif +} +#endif + /* vec_vmulesb */ static __inline__ vector short __attribute__((__always_inline__)) @@ -5839,6 +5869,26 @@ vec_mulo(vector unsigned int __a, vector unsigned int __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_mulo(vector signed long long __a, vector signed long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmulesd(__a, __b); +#else + return __builtin_altivec_vmulosd(__a, __b); +#endif +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_mulo(vector unsigned long long __a, vector unsigned long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmuleud(__a, __b); +#else + return __builtin_altivec_vmuloud(__a, __b); +#endif +} +#endif + /* vec_vmulosb */ static __inline__ vector short __attribute__((__always_inline__)) diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c index 6fe6d9fdf72d6..ac766e264b2da 100644 --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -928,6 +928,44 @@ int test_vec_test_lsbb_all_zeros(void) { return vec_test_lsbb_all_zeros(vuca); } +vector unsigned __int128 test_vec_mule_u128(void) { + // CHECK-BE: @llvm.ppc.altivec.vmuleud(<2 x i64> + // CHECK-BE-NEXT: ret <1 x i128> + // CHECK-LE: @llvm.ppc.altivec.vmuloud(<2 x i64> + // CHECK-LE-NEXT: ret <1 x i128> + return vec_mule(vulla, vullb); +} + +vector signed __int128 test_vec_mule_s128(void) { + // CHECK-BE: @llvm.ppc.altivec.vmulesd(<2 x i64> + // CHECK-BE-NEXT: ret <1 x i128> + // CHECK-LE: @llvm.ppc.altivec.vmulosd(<2 x i64> + // CHECK-LE-NEXT: ret <1 x i128> + return vec_mule(vslla, vsllb); +} + +vector unsigned __int128 test_vec_mulo_u128(void) { + // CHECK-BE: @llvm.ppc.altivec.vmuloud(<2 x i64> + // CHECK-BE-NEXT: ret <1 x i128> + // CHECK-LE: @llvm.ppc.altivec.vmuleud(<2 x i64> + // CHECK-LE-NEXT: ret <1 x i128> + return vec_mulo(vulla, vullb); +} + +vector signed __int128 test_vec_mulo_s128(void) { + // CHECK-BE: @llvm.ppc.altivec.vmulosd(<2 x i64> + // CHECK-BE-NEXT: ret <1 x i128> + // CHECK-LE: @llvm.ppc.altivec.vmulesd(<2 x i64> + // CHECK-LE-NEXT: ret <1 x i128> + return vec_mulo(vslla, vsllb); +} + +vector unsigned __int128 test_vec_msumc_u128(void) { + // CHECK: @llvm.ppc.altivec.vmsumcud(<2 x i64> + // CHECK-NEXT: ret <1 x i128> + return vec_msumc(vulla, vullb, vui128a); +} + vector signed __int128 test_vec_xl_sext_i8(void) { // CHECK: load i8 // CHECK: sext i8 diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 1ef44b735c9fc..4ead968a19752 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -193,6 +193,13 @@ class PowerPC_Vec_QQQ_Intrinsic [llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; +/// PowerPC_Vec_QDD_Intrinsic - A PowerPC intrinsic that takes two v2i64 +/// vectors and returns one v1i128. These intrinsics have no side effects. +class PowerPC_Vec_QDD_Intrinsic + : PowerPC_Vec_Intrinsic; + //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Class Definitions. // @@ -673,6 +680,9 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmsumcud : GCCBuiltin<"__builtin_altivec_vmsumcud">, + Intrinsic<[llvm_v1i128_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>; // Vector Multiply Instructions. def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">, @@ -684,6 +694,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmulesd : PowerPC_Vec_QDD_Intrinsic<"vmulesd">; def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -693,6 +704,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmuleud : PowerPC_Vec_QDD_Intrinsic<"vmuleud">; def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], @@ -703,6 +715,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmulosd : PowerPC_Vec_QDD_Intrinsic<"vmulosd">; def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -712,6 +725,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmuloud : PowerPC_Vec_QDD_Intrinsic<"vmuloud">; // Vector Sum Instructions. def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">, diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 81455adbd0b7b..8032656135a44 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1256,16 +1256,25 @@ let Predicates = [IsISA3_1] in { } def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulesd $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmulesd $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA, + v2i64:$vB))]>; def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmuleud $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmuleud $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA, + v2i64:$vB))]>; def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulosd $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmulosd $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA, + v2i64:$vB))]>; def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmuloud $vD, $vA, $vB", IIC_VecGeneral, []>; - def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), - "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, []>; + "vmuloud $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA, + v2i64:$vB))]>; + def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vmsumcud + v2i64:$vA, v2i64:$vB, v1i128:$vC))]>; def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vdivsq $vD, $vA, $vB", IIC_VecGeneral, []>; def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll index fd58654d0ae1e..722a4de860c74 100644 --- a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll +++ b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll @@ -10,6 +10,7 @@ ; This includes the low order and high order versions of vector multiply. ; The low order version operates on doublewords, whereas the high order version ; operates on signed and unsigned words and doublewords. +; This file also includes 128 bit vector multiply instructions. define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vmulld: @@ -122,3 +123,54 @@ entry: %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %mulh } + +declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone + +define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone { +; CHECK-LABEL: test_vmuleud: +; CHECK: # %bb.0: +; CHECK-NEXT: vmuleud v2, v2, v3 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y) + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone { +; CHECK-LABEL: test_vmuloud: +; CHECK: # %bb.0: +; CHECK-NEXT: vmuloud v2, v2, v3 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y) + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone { +; CHECK-LABEL: test_vmulesd: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulesd v2, v2, v3 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y) + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone { +; CHECK-LABEL: test_vmulosd: +; CHECK: # %bb.0: +; CHECK-NEXT: vmulosd v2, v2, v3 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y) + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone { +; CHECK-LABEL: test_vmsumcud: +; CHECK: # %bb.0: +; CHECK-NEXT: vmsumcud v2, v2, v3, v4 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) + ret <1 x i128> %tmp +} From 737a4501e815d8dd57e5095dbbbede500dfa8ccb Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Wed, 2 Sep 2020 21:20:33 +0200 Subject: [PATCH 034/465] Add constexpr to pair Reviewed By: #libc, ldionne Differential Revision: https://reviews.llvm.org/D80558 --- libcxx/docs/FeatureTestMacroTable.rst | 2 + libcxx/include/tuple | 2 +- libcxx/include/utility | 20 +- libcxx/include/version | 2 + .../utility.version.pass.cpp | 34 ++- .../version.version.pass.cpp | 20 ++ .../pairs.pair/assign_const_pair_U_V.pass.cpp | 72 +++-- .../pairs/pairs.pair/assign_pair.pass.cpp | 138 +++++----- .../pairs/pairs.pair/assign_rv_pair.pass.cpp | 139 +++++----- .../pairs.pair/assign_rv_pair_U_V.pass.cpp | 80 +++--- .../pairs/pairs.pair/const_pair_U_V.pass.cpp | 249 ++++++++++-------- .../pairs/pairs.pair/piecewise.pass.cpp | 28 +- .../utility/pairs/pairs.pair/swap.pass.cpp | 63 +++-- .../generate_feature_test_macro_components.py | 6 + 14 files changed, 478 insertions(+), 377 deletions(-) diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index a6867fb30a37c..1dbeb213f8ba8 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -184,6 +184,8 @@ Status ------------------------------------------------- ----------------- ``__cpp_lib_constexpr_swap_algorithms`` *unimplemented* ------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_utility `` ``201811L`` + ------------------------------------------------- ----------------- ``__cpp_lib_destroying_delete`` ``201806L`` ------------------------------------------------- ----------------- ``__cpp_lib_endian`` ``201907L`` diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 1f80b70759c78..c3c7db5ff1184 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -1393,7 +1393,7 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator, _Alloc> template template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair<_T1, _T2>::pair(piecewise_construct_t, tuple<_Args1...>& __first_args, tuple<_Args2...>& __second_args, __tuple_indices<_I1...>, __tuple_indices<_I2...>) diff --git a/libcxx/include/utility b/libcxx/include/utility index 7ac322bfe7102..13489de22c95a 100644 --- a/libcxx/include/utility +++ b/libcxx/include/utility @@ -499,7 +499,7 @@ struct _LIBCPP_TEMPLATE_VIS pair second(_VSTD::get<1>(_VSTD::forward<_Tuple>(__p))) {} template - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair(piecewise_construct_t __pc, tuple<_Args1...> __first_args, tuple<_Args2...> __second_args) _NOEXCEPT_((is_nothrow_constructible::value && @@ -508,7 +508,7 @@ struct _LIBCPP_TEMPLATE_VIS pair typename __make_tuple_indices::type(), typename __make_tuple_indices::type()) {} - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair& operator=(typename conditional< is_copy_assignable::value && is_copy_assignable::value, @@ -521,7 +521,7 @@ struct _LIBCPP_TEMPLATE_VIS pair return *this; } - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair& operator=(typename conditional< is_move_assignable::value && is_move_assignable::value, @@ -537,7 +537,7 @@ struct _LIBCPP_TEMPLATE_VIS pair template ::template __enable_assign<_Tuple>() > = false> - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 pair& operator=(_Tuple&& __p) { first = _VSTD::get<0>(_VSTD::forward<_Tuple>(__p)); second = _VSTD::get<1>(_VSTD::forward<_Tuple>(__p)); @@ -545,7 +545,7 @@ struct _LIBCPP_TEMPLATE_VIS pair } #endif - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 void swap(pair& __p) _NOEXCEPT_(__is_nothrow_swappable::value && __is_nothrow_swappable::value) @@ -558,10 +558,10 @@ private: #ifndef _LIBCPP_CXX03_LANG template - _LIBCPP_INLINE_VISIBILITY - pair(piecewise_construct_t, - tuple<_Args1...>& __first_args, tuple<_Args2...>& __second_args, - __tuple_indices<_I1...>, __tuple_indices<_I2...>); + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + pair(piecewise_construct_t, + tuple<_Args1...>& __first_args, tuple<_Args2...>& __second_args, + __tuple_indices<_I1...>, __tuple_indices<_I2...>); #endif }; @@ -619,7 +619,7 @@ operator<=(const pair<_T1,_T2>& __x, const pair<_T1,_T2>& __y) } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 typename enable_if < __is_swappable<_T1>::value && diff --git a/libcxx/include/version b/libcxx/include/version index acedd03073cc5..dc53be3937c4c 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -42,6 +42,7 @@ __cpp_lib_concepts 201806L __cpp_lib_constexpr_misc 201811L __cpp_lib_constexpr_swap_algorithms 201806L +__cpp_lib_constexpr_utility 201811L __cpp_lib_destroying_delete 201806L __cpp_lib_enable_shared_from_this 201603L __cpp_lib_endian 201907L @@ -227,6 +228,7 @@ __cpp_lib_void_t 201411L // # define __cpp_lib_concepts 201806L // # define __cpp_lib_constexpr_misc 201811L // # define __cpp_lib_constexpr_swap_algorithms 201806L +# define __cpp_lib_constexpr_utility 201811L # if _LIBCPP_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L # define __cpp_lib_destroying_delete 201806L # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp index 1fd38627a715a..73762a4f12fa1 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp @@ -13,13 +13,14 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_as_const 201510L [C++17] - __cpp_lib_constexpr_misc 201811L [C++2a] - __cpp_lib_exchange_function 201304L [C++14] - __cpp_lib_integer_sequence 201304L [C++14] - __cpp_lib_to_chars 201611L [C++17] - __cpp_lib_tuples_by_type 201304L [C++14] +/* Constant Value + __cpp_lib_as_const 201510L [C++17] + __cpp_lib_constexpr_misc 201811L [C++2a] + __cpp_lib_constexpr_utility 201811L [C++2a] + __cpp_lib_exchange_function 201304L [C++14] + __cpp_lib_integer_sequence 201304L [C++14] + __cpp_lib_to_chars 201611L [C++17] + __cpp_lib_tuples_by_type 201304L [C++14] */ #include @@ -35,6 +36,10 @@ # error "__cpp_lib_constexpr_misc should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifdef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should not be defined before c++14" # endif @@ -61,6 +66,10 @@ # error "__cpp_lib_constexpr_misc should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifndef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should be defined in c++14" # endif @@ -99,6 +108,10 @@ # error "__cpp_lib_constexpr_misc should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifndef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should be defined in c++17" # endif @@ -155,6 +168,13 @@ # endif # endif +# ifndef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should be defined in c++2a" +# endif +# if __cpp_lib_constexpr_utility != 201811L +# error "__cpp_lib_constexpr_utility should have the value 201811L in c++2a" +# endif + # ifndef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should be defined in c++2a" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp index 96a0fea6b9186..5e485a2f585e7 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp @@ -36,6 +36,7 @@ __cpp_lib_concepts 201806L [C++2a] __cpp_lib_constexpr_misc 201811L [C++2a] __cpp_lib_constexpr_swap_algorithms 201806L [C++2a] + __cpp_lib_constexpr_utility 201811L [C++2a] __cpp_lib_destroying_delete 201806L [C++2a] __cpp_lib_enable_shared_from_this 201603L [C++17] __cpp_lib_endian 201907L [C++2a] @@ -194,6 +195,10 @@ # error "__cpp_lib_constexpr_swap_algorithms should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++2a" # endif @@ -550,6 +555,10 @@ # error "__cpp_lib_constexpr_swap_algorithms should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++2a" # endif @@ -1020,6 +1029,10 @@ # error "__cpp_lib_constexpr_swap_algorithms should not be defined before c++2a" # endif +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++2a" # endif @@ -1721,6 +1734,13 @@ # endif # endif +# ifndef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should be defined in c++2a" +# endif +# if __cpp_lib_constexpr_utility != 201811L +# error "__cpp_lib_constexpr_utility should have the value 201811L in c++2a" +# endif + # if TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L # ifndef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should be defined in c++2a" diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_const_pair_U_V.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_const_pair_U_V.pass.cpp index 078d424a1ba3e..becf36e66cf3a 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_const_pair_U_V.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_const_pair_U_V.pass.cpp @@ -20,33 +20,51 @@ #include "archetypes.h" #endif -int main(int, char**) -{ - { - typedef std::pair P1; - typedef std::pair P2; - P1 p1(3, static_cast(4)); - P2 p2; - p2 = p1; - assert(p2.first == 3); - assert(p2.second == 4); - } -#if TEST_STD_VER >= 11 - { - using C = TestTypes::TestType; - using P = std::pair; - using T = std::pair; - const T t(42, -42); - P p(101, 101); - C::reset_constructors(); - p = t; - assert(C::constructed == 0); - assert(C::assigned == 1); - assert(C::copy_assigned == 1); - assert(C::move_assigned == 0); - assert(p.first == 42); - assert(p.second.value == -42); - } +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P1; + typedef std::pair P2; + P1 p1(3, static_cast(4)); + P2 p2; + p2 = p1; + assert(p2.first == 3); + assert(p2.second == 4); + } +#if TEST_STD_VER >= 20 + { + using C = ConstexprTestTypes::TestType; + using P = std::pair; + using T = std::pair; + const T t(42, -42); + P p(101, 101); + p = t; + assert(p.first == 42); + assert(p.second.value == -42); + } +#elif TEST_STD_VER >= 11 + { + using C = TestTypes::TestType; + using P = std::pair; + using T = std::pair; + const T t(42, -42); + P p(101, 101); + C::reset_constructors(); + p = t; + assert(C::constructed == 0); + assert(C::assigned == 1); + assert(C::copy_assigned == 1); + assert(C::move_assigned == 0); + assert(p.first == 42); + assert(p.second.value == -42); + } +#endif + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); #endif return 0; diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp index 303aedf50969d..94e30aec3332c 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp @@ -19,85 +19,81 @@ #include #include "test_macros.h" - - -struct NonAssignable { - NonAssignable& operator=(NonAssignable const&) = delete; - NonAssignable& operator=(NonAssignable&&) = delete; -}; -struct CopyAssignable { - CopyAssignable() = default; - CopyAssignable(CopyAssignable const&) = default; - CopyAssignable& operator=(CopyAssignable const&) = default; - CopyAssignable& operator=(CopyAssignable&&) = delete; -}; -struct MoveAssignable { - MoveAssignable() = default; - MoveAssignable& operator=(MoveAssignable const&) = delete; - MoveAssignable& operator=(MoveAssignable&&) = default; -}; +#include "archetypes.h" struct CountAssign { - static int copied; - static int moved; - static void reset() { copied = moved = 0; } - CountAssign() = default; - CountAssign& operator=(CountAssign const&) { ++copied; return *this; } - CountAssign& operator=(CountAssign&&) { ++moved; return *this; } + int copied = 0; + int moved = 0; + TEST_CONSTEXPR_CXX20 CountAssign() = default; + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign const&) { + ++copied; + return *this; + } + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign&&) { + ++moved; + return *this; + } }; -int CountAssign::copied = 0; -int CountAssign::moved = 0; struct Incomplete; extern Incomplete inc_obj; -int main(int, char**) -{ - { - typedef std::pair P; - const P p1(CopyAssignable(), short{4}); - P p2; - p2 = p1; - assert(p2.second == 4); - } - { - using P = std::pair; - int x = 42; - int y = 101; - int x2 = -1; - int y2 = 300; - P p1(x, std::move(y)); - P p2(x2, std::move(y2)); - p1 = p2; - assert(p1.first == x2); - assert(p1.second == y2); - } - { - using P = std::pair; - static_assert(!std::is_copy_assignable

::value, ""); - } - { - CountAssign::reset(); - using P = std::pair; - static_assert(std::is_copy_assignable

::value, ""); - P p; - P p2; - p = p2; - assert(CountAssign::copied == 1); - assert(CountAssign::moved == 0); - } - { - using P = std::pair; - static_assert(!std::is_copy_assignable

::value, ""); - } - { - using P = std::pair; - static_assert(!std::is_copy_assignable

::value, ""); - P p(42, inc_obj); - assert(&p.second == &inc_obj); - } +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P; + const P p1(ConstexprTestTypes::CopyOnly(), short{4}); + P p2; + p2 = p1; + assert(p2.second == 4); + } + { + using P = std::pair; + int x = 42; + int y = 101; + int x2 = -1; + int y2 = 300; + P p1(x, std::move(y)); + P p2(x2, std::move(y2)); + p1 = p2; + assert(p1.first == x2); + assert(p1.second == y2); + } + { + using P = std::pair; + static_assert(!std::is_copy_assignable

::value, ""); + } + { + using P = std::pair; + static_assert(std::is_copy_assignable

::value, ""); + P p; + P p2; + p = p2; + assert(p.first.copied == 1); + assert(p.first.moved == 0); + assert(p2.first.copied == 0); + assert(p2.first.moved == 0); + } + { + using P = std::pair; + static_assert(!std::is_copy_assignable

::value, ""); + } + { + using P = std::pair; + static_assert(!std::is_copy_assignable

::value, ""); + P p(42, inc_obj); + assert(&p.second == &inc_obj); + } + + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif - return 0; + return 0; } struct Incomplete {}; diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp index 811e89015b485..cc4e4f5c1108e 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp @@ -19,81 +19,78 @@ #include #include "test_macros.h" - - -struct NonAssignable { - NonAssignable& operator=(NonAssignable const&) = delete; - NonAssignable& operator=(NonAssignable&&) = delete; -}; -struct CopyAssignable { - CopyAssignable() = default; - CopyAssignable& operator=(CopyAssignable const&) = default; - CopyAssignable& operator=(CopyAssignable&&) = delete; -}; -struct MoveAssignable { - MoveAssignable() = default; - MoveAssignable& operator=(MoveAssignable const&) = delete; - MoveAssignable& operator=(MoveAssignable&&) = default; -}; +#include "archetypes.h" struct CountAssign { - static int copied; - static int moved; - static void reset() { copied = moved = 0; } - CountAssign() = default; - CountAssign& operator=(CountAssign const&) { ++copied; return *this; } - CountAssign& operator=(CountAssign&&) { ++moved; return *this; } + int copied = 0; + int moved = 0; + TEST_CONSTEXPR_CXX20 CountAssign() = default; + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign const&) { + ++copied; + return *this; + } + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign&&) { + ++moved; + return *this; + } }; -int CountAssign::copied = 0; -int CountAssign::moved = 0; -int main(int, char**) -{ - { - typedef std::pair, int> P; - P p1(std::unique_ptr(new int(3)), 4); - P p2; - p2 = std::move(p1); - assert(*p2.first == 3); - assert(p2.second == 4); - } - { - using P = std::pair; - int x = 42; - int y = 101; - int x2 = -1; - int y2 = 300; - P p1(x, std::move(y)); - P p2(x2, std::move(y2)); - p1 = std::move(p2); - assert(p1.first == x2); - assert(p1.second == y2); - } - { - using P = std::pair; - static_assert(!std::is_move_assignable

::value, ""); - } - { - // The move decays to the copy constructor - CountAssign::reset(); - using P = std::pair; - static_assert(std::is_move_assignable

::value, ""); - P p; - P p2; - p = std::move(p2); - assert(CountAssign::moved == 0); - assert(CountAssign::copied == 1); - } - { - CountAssign::reset(); - using P = std::pair; - static_assert(std::is_move_assignable

::value, ""); - P p; - P p2; - p = std::move(p2); - assert(CountAssign::moved == 1); - assert(CountAssign::copied == 0); - } +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P; + P p1(3, 4); + P p2; + p2 = std::move(p1); + assert(p2.first.value == 3); + assert(p2.second == 4); + } + { + using P = std::pair; + int x = 42; + int y = 101; + int x2 = -1; + int y2 = 300; + P p1(x, std::move(y)); + P p2(x2, std::move(y2)); + p1 = std::move(p2); + assert(p1.first == x2); + assert(p1.second == y2); + } + { + using P = std::pair; + static_assert(!std::is_move_assignable

::value, ""); + } + { + // The move decays to the copy constructor + using P = std::pair; + static_assert(std::is_move_assignable

::value, ""); + P p; + P p2; + p = std::move(p2); + assert(p.first.moved == 0); + assert(p.first.copied == 1); + assert(p2.first.moved == 0); + assert(p2.first.copied == 0); + } + { + using P = std::pair; + static_assert(std::is_move_assignable

::value, ""); + P p; + P p2; + p = std::move(p2); + assert(p.first.moved == 1); + assert(p.first.copied == 0); + assert(p2.first.moved == 0); + assert(p2.first.copied == 0); + } + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp index ed56480b2f77d..7267f7a2ef8b2 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp @@ -17,46 +17,58 @@ #include #include #include -#include #include "test_macros.h" +#include "archetypes.h" -struct Base -{ - virtual ~Base() {} +struct Derived : ConstexprTestTypes::MoveOnly { + Derived() = default; + TEST_CONSTEXPR_CXX20 Derived(ConstexprTestTypes::MoveOnly&&){}; }; - -struct Derived - : public Base -{ +struct CountAssign { + int copied = 0; + int moved = 0; + TEST_CONSTEXPR_CXX20 CountAssign() = default; + TEST_CONSTEXPR_CXX20 CountAssign(const int){}; + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign const&) { + ++copied; + return *this; + } + TEST_CONSTEXPR_CXX20 CountAssign& operator=(CountAssign&&) { + ++moved; + return *this; + } }; -int main(int, char**) -{ - { - typedef std::pair, short> P1; - typedef std::pair, long> P2; - P1 p1(std::unique_ptr(), static_cast(4)); - P2 p2; - p2 = std::move(p1); - assert(p2.first == nullptr); - assert(p2.second == 4); - } - { - using C = TestTypes::TestType; - using P = std::pair; - using T = std::pair; - T t(42, -42); - P p(101, 101); - C::reset_constructors(); - p = std::move(t); - assert(C::constructed == 0); - assert(C::assigned == 1); - assert(C::copy_assigned == 0); - assert(C::move_assigned == 1); - assert(p.first == 42); - assert(p.second.value == -42); - } +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P1; + typedef std::pair P2; + P1 p1(Derived(), static_cast(4)); + P2 p2; + p2 = std::move(p1); + assert(p2.second == 4); + } + { + using P = std::pair; + using T = std::pair; + T t(42, -42); + P p(101, 101); + p = std::move(t); + assert(p.first == 42); + assert(p.second.moved == 1); + assert(p.second.copied == 0); + assert(t.second.moved == 0); + assert(t.second.copied == 0); + } + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp index 358689ed0b97f..42ddb4e6058b2 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp @@ -23,18 +23,16 @@ #include "test_macros.h" using namespace ImplicitTypes; // Get implicitly archetypes -template -void test_pair_const() -{ - using P1 = std::pair; - using P2 = std::pair; - using UP1 = std::pair const&; - using UP2 = std::pair const&; - static_assert(std::is_constructible::value == CanCopy, ""); - static_assert(test_convertible() == CanConvert, ""); - static_assert(std::is_constructible::value == CanCopy, ""); - static_assert(test_convertible() == CanConvert, ""); +template +TEST_CONSTEXPR_CXX20 void test_pair_const() { + using P1 = std::pair; + using P2 = std::pair; + using UP1 = std::pair const&; + using UP2 = std::pair const&; + static_assert(std::is_constructible::value == CanCopy, ""); + static_assert(test_convertible() == CanConvert, ""); + static_assert(std::is_constructible::value == CanCopy, ""); + static_assert(test_convertible() == CanConvert, ""); } template @@ -55,104 +53,115 @@ struct ImplicitT { int value; }; -int main(int, char**) -{ - { - typedef std::pair P1; - typedef std::pair P2; - const P1 p1(3, 4); - const P2 p2 = p1; - assert(p2.first == 3); - assert(p2.second == 4); - } - { - // We allow derived types to use this constructor - using P1 = DPair; - using P2 = std::pair; - P1 p1(42, 101); - P2 p2(p1); - assert(p2.first == 42); - assert(p2.second == 101); - } - { - test_pair_const(); // copy construction - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); // copy construction - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); // copy construction - test_pair_const(); - test_pair_const(); - - test_pair_const(); // copy construction - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - } - - { // Test construction of references - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - // Unfortunately the below conversions are allowed and create dangling - // references. - //test_pair_const(); - //test_pair_const(); - //test_pair_const(); - // But these are not because the converting constructor is explicit. - test_pair_const(); - test_pair_const(); - test_pair_const(); - - } - { - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - - test_pair_const(); - test_pair_const(); - test_pair_const(); - test_pair_const(); - } +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P1; + typedef std::pair P2; + const P1 p1(3, 4); + const P2 p2 = p1; + assert(p2.first == 3); + assert(p2.second == 4); + } + { + // We allow derived types to use this constructor + using P1 = DPair; + using P2 = std::pair; + P1 p1(42, 101); + P2 p2(p1); + assert(p2.first == 42); + assert(p2.second == 101); + } + { + test_pair_const(); // copy construction + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); // copy construction + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); // copy construction + test_pair_const(); + test_pair_const(); + + test_pair_const(); // copy construction + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + } + + { // Test construction of references + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + // Unfortunately the below conversions are allowed and create dangling + // references. + //test_pair_const(); + //test_pair_const(); + //test_pair_const(); + // But these are not because the converting constructor is explicit. + test_pair_const(); + test_pair_const(); + test_pair_const(); + } + { + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + + test_pair_const(); + test_pair_const(); + test_pair_const(); + test_pair_const(); + } #if TEST_STD_VER > 11 { typedef std::pair P1; @@ -171,14 +180,22 @@ int main(int, char**) static_assert(p2.second.value == 101, ""); } { - using P1 = std::pair; - using P2 = std::pair; - constexpr P1 p1(42, 101); - constexpr P2 p2 = p1; - static_assert(p2.first.value == 42, ""); - static_assert(p2.second.value == 101, ""); + using P1 = std::pair; + using P2 = std::pair; + constexpr P1 p1(42, 101); + constexpr P2 p2 = p1; + static_assert(p2.first.value == 42, ""); + static_assert(p2.second.value == 101, ""); } #endif + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp index 203209d5e6346..37bbb9501769c 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp @@ -22,18 +22,24 @@ #include "test_macros.h" +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P1; + typedef std::pair P2; + typedef std::pair P3; + P3 p3(std::piecewise_construct, std::tuple(3, nullptr), + std::tuple(nullptr, 4)); + assert(p3.first == P1(3, nullptr)); + assert(p3.second == P2(nullptr, 4)); + } + return true; +} -int main(int, char**) -{ - { - typedef std::pair P1; - typedef std::pair P2; - typedef std::pair P3; - P3 p3(std::piecewise_construct, std::tuple(3, nullptr), - std::tuple(nullptr, 4)); - assert(p3.first == P1(3, nullptr)); - assert(p3.second == P2(nullptr, 4)); - } +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp index 9a497e5ac532d..a2d720ff42ea6 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp @@ -19,35 +19,40 @@ struct S { int i; - S() : i(0) {} - S(int j) : i(j) {} - S * operator& () { assert(false); return this; } - S const * operator& () const { assert(false); return this; } - bool operator==(int x) const { return i == x; } - }; - -int main(int, char**) -{ - { - typedef std::pair P1; - P1 p1(3, static_cast(4)); - P1 p2(5, static_cast(6)); - p1.swap(p2); - assert(p1.first == 5); - assert(p1.second == 6); - assert(p2.first == 3); - assert(p2.second == 4); - } - { - typedef std::pair P1; - P1 p1(3, S(4)); - P1 p2(5, S(6)); - p1.swap(p2); - assert(p1.first == 5); - assert(p1.second == 6); - assert(p2.first == 3); - assert(p2.second == 4); - } + TEST_CONSTEXPR_CXX20 S() : i(0) {} + TEST_CONSTEXPR_CXX20 S(int j) : i(j) {} + TEST_CONSTEXPR_CXX20 bool operator==(int x) const { return i == x; } +}; + +TEST_CONSTEXPR_CXX20 bool test() { + { + typedef std::pair P1; + P1 p1(3, static_cast(4)); + P1 p2(5, static_cast(6)); + p1.swap(p2); + assert(p1.first == 5); + assert(p1.second == 6); + assert(p2.first == 3); + assert(p2.second == 4); + } + { + typedef std::pair P1; + P1 p1(3, S(4)); + P1 p2(5, S(6)); + p1.swap(p2); + assert(p1.first == 5); + assert(p1.second == 6); + assert(p2.first == 3); + assert(p2.second == 4); + } + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index b77f88489d9d5..88241614420aa 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -607,6 +607,12 @@ def add_version_header(tc): "depends": "defined(__cpp_concepts) && __cpp_concepts >= 201811L", "internal_depends": "defined(__cpp_concepts) && __cpp_concepts >= 201811L", }, + {"name": "__cpp_lib_constexpr_utility ", + "values": { + "c++2a": int(201811), + }, + "headers": ["utility"], + }, ]], key=lambda tc: tc["name"]) def get_std_dialects(): From 1cfde143e82aeb47cffba436ba7b5302d8e14193 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 2 Sep 2020 12:21:06 -0700 Subject: [PATCH 035/465] [GCDAProfiling] Suppress -Wprio-ctor-dtor for GCC>=9 and remove unused write_string/length_of_string The `__attribute__((destructor(100)))` diagnostic does not have a warning option in GCC 8 (before r264853) and thus cannot be suppressed. --- compiler-rt/lib/profile/GCDAProfiling.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c index fa4b951383324..d57fdbae5371d 100644 --- a/compiler-rt/lib/profile/GCDAProfiling.c +++ b/compiler-rt/lib/profile/GCDAProfiling.c @@ -210,22 +210,6 @@ static void write_64bit_value(uint64_t i) { write_32bit_value(hi); } -static uint32_t length_of_string(const char *s) { - return (strlen(s) / 4) + 1; -} - -// Remove when we support libgcov 9 current_working_directory. -#if !defined(_MSC_VER) && defined(__clang__) -__attribute__((unused)) -#endif -static void -write_string(const char *s) { - uint32_t len = length_of_string(s); - write_32bit_value(len); - write_bytes(s, strlen(s)); - write_bytes("\0\0\0\0", 4 - (strlen(s) % 4)); -} - static uint32_t read_32bit_value() { uint32_t val; @@ -632,6 +616,9 @@ void llvm_writeout_files(void) { // __attribute__((destructor)) and destructors whose priorities are greater than // 100 run before this function and can thus be tracked. The priority is // compatible with GCC 7 onwards. +#if __GNUC__ >= 9 +#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" +#endif __attribute__((destructor(100))) #endif static void llvm_writeout_and_clear(void) { From 6f0a3711bc15f8b50ad56d64eee70d9ba62f70c6 Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Wed, 2 Sep 2020 18:47:29 +0000 Subject: [PATCH 036/465] [libTooling] Restore defaults for matchers in makeRule. This patch restores the default traversal for Transformer's `makeRule` to `TK_AsIs`. The implicit mode has proven problematic. Differential Revision: https://reviews.llvm.org/D87048 --- clang/lib/Tooling/Transformer/RewriteRule.cpp | 7 ++-- clang/unittests/Tooling/TransformerTest.cpp | 34 ++----------------- 2 files changed, 6 insertions(+), 35 deletions(-) diff --git a/clang/lib/Tooling/Transformer/RewriteRule.cpp b/clang/lib/Tooling/Transformer/RewriteRule.cpp index fe33f9cf8b0ca..594e22f56b874 100644 --- a/clang/lib/Tooling/Transformer/RewriteRule.cpp +++ b/clang/lib/Tooling/Transformer/RewriteRule.cpp @@ -345,14 +345,13 @@ transformer::detail::buildMatchers(const RewriteRule &Rule) { // Each anyOf explicitly controls the traversal kind. The anyOf itself is set // to `TK_AsIs` to ensure no nodes are skipped, thereby deferring to the kind // of the branches. Then, each branch is either left as is, if the kind is - // already set, or explicitly set to `TK_IgnoreUnlessSpelledInSource`. We - // choose this setting, because we think it is the one most friendly to - // beginners, who are (largely) the target audience of Transformer. + // already set, or explicitly set to `TK_AsIs`. We choose this setting because + // it is the default interpretation of matchers. std::vector Matchers; for (const auto &Bucket : Buckets) { DynTypedMatcher M = DynTypedMatcher::constructVariadic( DynTypedMatcher::VO_AnyOf, Bucket.first, - taggedMatchers("Tag", Bucket.second, TK_IgnoreUnlessSpelledInSource)); + taggedMatchers("Tag", Bucket.second, TK_AsIs)); M.setAllowBind(true); // `tryBind` is guaranteed to succeed, because `AllowBind` was set to true. Matchers.push_back(M.tryBind(RootID)->withTraversalKind(TK_AsIs)); diff --git a/clang/unittests/Tooling/TransformerTest.cpp b/clang/unittests/Tooling/TransformerTest.cpp index 26158b1520f90..2c9bd7dfd32de 100644 --- a/clang/unittests/Tooling/TransformerTest.cpp +++ b/clang/unittests/Tooling/TransformerTest.cpp @@ -878,10 +878,8 @@ TEST_F(TransformerTest, OrderedRuleMultipleKinds) { } // Verifies that a rule with a top-level matcher for an implicit node (like -// `implicitCastExpr`) does not change the code, when the AST traversal skips -// implicit nodes. In this test, only the rule with the explicit-node matcher -// will fire. -TEST_F(TransformerTest, OrderedRuleImplicitIgnored) { +// `implicitCastExpr`) works correctly -- the implicit nodes are not skipped. +TEST_F(TransformerTest, OrderedRuleImplicitMatched) { std::string Input = R"cc( void f1(); int f2(); @@ -892,7 +890,7 @@ TEST_F(TransformerTest, OrderedRuleImplicitIgnored) { void f1(); int f2(); void call_f1() { REPLACE_F1; } - float call_f2() { return f2(); } + float call_f2() { return REPLACE_F2; } )cc"; RewriteRule ReplaceF1 = @@ -904,32 +902,6 @@ TEST_F(TransformerTest, OrderedRuleImplicitIgnored) { testRule(applyFirst({ReplaceF1, ReplaceF2}), Input, Expected); } -// Verifies that explicitly setting the traversal kind fixes the problem in the -// previous test. -TEST_F(TransformerTest, OrderedRuleImplicitMatched) { - std::string Input = R"cc( - void f1(); - int f2(); - void call_f1() { f1(); } - float call_f2() { return f2(); } - )cc"; - std::string Expected = R"cc( - void f1(); - int f2(); - void call_f1() { REPLACE_F1; } - float call_f2() { return REPLACE_F2; } - )cc"; - - RewriteRule ReplaceF1 = makeRule( - traverse(clang::TK_AsIs, callExpr(callee(functionDecl(hasName("f1"))))), - changeTo(cat("REPLACE_F1"))); - RewriteRule ReplaceF2 = - makeRule(traverse(clang::TK_AsIs, - implicitCastExpr(hasSourceExpression(callExpr()))), - changeTo(cat("REPLACE_F2"))); - testRule(applyFirst({ReplaceF1, ReplaceF2}), Input, Expected); -} - // // Negative tests (where we expect no transformation to occur). // From 0e86f390457a2b4dd1f2d1770db912963a36f240 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Wed, 2 Sep 2020 21:31:09 +0200 Subject: [PATCH 037/465] [lldb/test] Fix TestPlatform*.py Windows failures (NFC) This patch fixes the windows failures introduced by `addb514`: http://lab.llvm.org:8011/builders/lldb-x64-windows-ninja/builds/18671/steps/test/logs/stdio This macro, used in the test to check the platform, was missing a `_`, making the test behave like it was run from a UNIX platform. Signed-off-by: Med Ismail Bennani --- lldb/test/API/commands/platform/basic/myshell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/API/commands/platform/basic/myshell.c b/lldb/test/API/commands/platform/basic/myshell.c index d1c0eecb943e9..8fef648de651c 100644 --- a/lldb/test/API/commands/platform/basic/myshell.c +++ b/lldb/test/API/commands/platform/basic/myshell.c @@ -8,7 +8,7 @@ int main(int argc, char *argv[]) { exit(1); } -#ifdef WIN32 +#if defined(_WIN32) || defined(_WIN64) char *cmd_opt = "/C"; #else char *cmd_opt = "-c"; From afe8ca867337f5c0e6d1665096e400686a64bef9 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Wed, 2 Sep 2020 21:42:56 +0200 Subject: [PATCH 038/465] [libcxx] Fix whitespace error --- libcxx/docs/FeatureTestMacroTable.rst | 2 +- .../utility.version.pass.cpp | 36 +++++++++---------- .../version.version.pass.cpp | 20 +++++------ .../generate_feature_test_macro_components.py | 2 +- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 1dbeb213f8ba8..f5c6e5b8251aa 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -184,7 +184,7 @@ Status ------------------------------------------------- ----------------- ``__cpp_lib_constexpr_swap_algorithms`` *unimplemented* ------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_utility `` ``201811L`` + ``__cpp_lib_constexpr_utility`` ``201811L`` ------------------------------------------------- ----------------- ``__cpp_lib_destroying_delete`` ``201806L`` ------------------------------------------------- ----------------- diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp index 73762a4f12fa1..e595a99a749ff 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.pass.cpp @@ -13,14 +13,14 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_as_const 201510L [C++17] - __cpp_lib_constexpr_misc 201811L [C++2a] - __cpp_lib_constexpr_utility 201811L [C++2a] - __cpp_lib_exchange_function 201304L [C++14] - __cpp_lib_integer_sequence 201304L [C++14] - __cpp_lib_to_chars 201611L [C++17] - __cpp_lib_tuples_by_type 201304L [C++14] +/* Constant Value + __cpp_lib_as_const 201510L [C++17] + __cpp_lib_constexpr_misc 201811L [C++2a] + __cpp_lib_constexpr_utility 201811L [C++2a] + __cpp_lib_exchange_function 201304L [C++14] + __cpp_lib_integer_sequence 201304L [C++14] + __cpp_lib_to_chars 201611L [C++17] + __cpp_lib_tuples_by_type 201304L [C++14] */ #include @@ -36,8 +36,8 @@ # error "__cpp_lib_constexpr_misc should not be defined before c++2a" # endif -# ifdef __cpp_lib_constexpr_utility -# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" # endif # ifdef __cpp_lib_exchange_function @@ -66,8 +66,8 @@ # error "__cpp_lib_constexpr_misc should not be defined before c++2a" # endif -# ifdef __cpp_lib_constexpr_utility -# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" # endif # ifndef __cpp_lib_exchange_function @@ -108,8 +108,8 @@ # error "__cpp_lib_constexpr_misc should not be defined before c++2a" # endif -# ifdef __cpp_lib_constexpr_utility -# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" # endif # ifndef __cpp_lib_exchange_function @@ -168,11 +168,11 @@ # endif # endif -# ifndef __cpp_lib_constexpr_utility -# error "__cpp_lib_constexpr_utility should be defined in c++2a" +# ifndef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should be defined in c++2a" # endif -# if __cpp_lib_constexpr_utility != 201811L -# error "__cpp_lib_constexpr_utility should have the value 201811L in c++2a" +# if __cpp_lib_constexpr_utility != 201811L +# error "__cpp_lib_constexpr_utility should have the value 201811L in c++2a" # endif # ifndef __cpp_lib_exchange_function diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp index 5e485a2f585e7..afbee586df3c6 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp @@ -195,8 +195,8 @@ # error "__cpp_lib_constexpr_swap_algorithms should not be defined before c++2a" # endif -# ifdef __cpp_lib_constexpr_utility -# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" # endif # ifdef __cpp_lib_destroying_delete @@ -555,8 +555,8 @@ # error "__cpp_lib_constexpr_swap_algorithms should not be defined before c++2a" # endif -# ifdef __cpp_lib_constexpr_utility -# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" # endif # ifdef __cpp_lib_destroying_delete @@ -1029,8 +1029,8 @@ # error "__cpp_lib_constexpr_swap_algorithms should not be defined before c++2a" # endif -# ifdef __cpp_lib_constexpr_utility -# error "__cpp_lib_constexpr_utility should not be defined before c++2a" +# ifdef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should not be defined before c++2a" # endif # ifdef __cpp_lib_destroying_delete @@ -1734,11 +1734,11 @@ # endif # endif -# ifndef __cpp_lib_constexpr_utility -# error "__cpp_lib_constexpr_utility should be defined in c++2a" +# ifndef __cpp_lib_constexpr_utility +# error "__cpp_lib_constexpr_utility should be defined in c++2a" # endif -# if __cpp_lib_constexpr_utility != 201811L -# error "__cpp_lib_constexpr_utility should have the value 201811L in c++2a" +# if __cpp_lib_constexpr_utility != 201811L +# error "__cpp_lib_constexpr_utility should have the value 201811L in c++2a" # endif # if TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 88241614420aa..6ad1a18569893 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -607,7 +607,7 @@ def add_version_header(tc): "depends": "defined(__cpp_concepts) && __cpp_concepts >= 201811L", "internal_depends": "defined(__cpp_concepts) && __cpp_concepts >= 201811L", }, - {"name": "__cpp_lib_constexpr_utility ", + {"name": "__cpp_lib_constexpr_utility", "values": { "c++2a": int(201811), }, From 5b4744b2c5af7b4ee564eacf81f812f371c52683 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Wed, 2 Sep 2020 16:03:01 -0400 Subject: [PATCH 039/465] Make sure that llvm-ml uses MASM integer lexing when in --as-lex mode --- llvm/tools/llvm-ml/llvm-ml.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp index 2d4a3afdc1f93..5abf22d6d6ddf 100644 --- a/llvm/tools/llvm-ml/llvm-ml.cpp +++ b/llvm/tools/llvm-ml/llvm-ml.cpp @@ -175,6 +175,7 @@ static std::unique_ptr GetOutputStream(StringRef Path) { static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, raw_ostream &OS) { AsmLexer Lexer(MAI); Lexer.setBuffer(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer()); + Lexer.setLexMasmIntegers(true); bool Error = false; while (Lexer.Lex().isNot(AsmToken::Eof)) { From c963757783d7cd8596d7f9cd814f01338458c496 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Wed, 2 Sep 2020 10:37:48 -0700 Subject: [PATCH 040/465] [flang] Implement nonstandard OPEN statement CARRIAGECONTROL specifier Differential Revision: https://reviews.llvm.org/D87052 --- flang/docs/Extensions.md | 1 + flang/docs/f2018-grammar.txt | 6 +++-- flang/include/flang/Common/Fortran-features.h | 16 +++++------ flang/include/flang/Common/Fortran.h | 1 + flang/include/flang/Parser/parse-tree.h | 10 ++++--- flang/lib/Lower/IO.cpp | 15 ++++++----- flang/lib/Parser/io-parsers.cpp | 17 +++++++++--- flang/lib/Semantics/check-io.cpp | 14 ++++++++++ flang/runtime/io-api.cpp | 27 ++++++++++++++++++- flang/runtime/io-api.h | 2 ++ flang/runtime/io-stmt.cpp | 5 ++++ flang/test/Semantics/io01.f90 | 7 +++++ flang/test/Semantics/io05.f90 | 1 + 13 files changed, 98 insertions(+), 24 deletions(-) diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index a3260400a9bf7..027927f67dfd4 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -56,6 +56,7 @@ Extensions, deletions, and legacy features supported by default * `NAME=` as synonym for `FILE=` * Data edit descriptors without width or other details * `D` lines in fixed form as comments or debug code +* `CARRIAGECONTROL=` on the OPEN and INQUIRE statements * `CONVERT=` on the OPEN and INQUIRE statements * `DISPOSE=` on the OPEN and INQUIRE statements * Leading semicolons are ignored before any statement that diff --git a/flang/docs/f2018-grammar.txt b/flang/docs/f2018-grammar.txt index 2de8cdfc1b8f7..9b2819d69c724 100644 --- a/flang/docs/f2018-grammar.txt +++ b/flang/docs/f2018-grammar.txt @@ -577,7 +577,8 @@ R1205 connect-spec -> POSITION = scalar-default-char-expr | RECL = scalar-int-expr | ROUND = scalar-default-char-expr | SIGN = scalar-default-char-expr | STATUS = scalar-default-char-expr - @ | CONVERT = scalar-default-char-expr + @ | CARRIAGECONTROL = scalar-default-char-expr + | CONVERT = scalar-default-char-expr | DISPOSE = scalar-default-char-expr R1206 file-name-expr -> scalar-default-char-expr R1207 iomsg-variable -> scalar-default-char-variable @@ -657,7 +658,8 @@ R1231 inquire-spec -> STREAM = scalar-default-char-variable | STATUS = scalar-default-char-variable | WRITE = scalar-default-char-variable - @ | CONVERT = scalar-default-char-expr + @ | CARRIAGECONTROL = scalar-default-char-expr + | CONVERT = scalar-default-char-expr | DISPOSE = scalar-default-char-expr R1301 format-stmt -> FORMAT format-specification diff --git a/flang/include/flang/Common/Fortran-features.h b/flang/include/flang/Common/Fortran-features.h index ebf7a8d9d6230..23c2e95fd5648 100644 --- a/flang/include/flang/Common/Fortran-features.h +++ b/flang/include/flang/Common/Fortran-features.h @@ -22,14 +22,14 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines, DoubleComplex, Byte, StarKind, QuadPrecision, SlashInitialization, TripletInArrayConstructor, MissingColons, SignedComplexLiteral, OldStyleParameter, ComplexConstructor, PercentLOC, SignedPrimary, FileName, - Convert, Dispose, IOListLeadingComma, AbbreviatedEditDescriptor, - ProgramParentheses, PercentRefAndVal, OmitFunctionDummies, CrayPointer, - Hollerith, ArithmeticIF, Assign, AssignedGOTO, Pause, OpenACC, OpenMP, - CruftAfterAmpersand, ClassicCComments, AdditionalFormats, BigIntLiterals, - RealDoControls, EquivalenceNumericWithCharacter, AdditionalIntrinsics, - AnonymousParents, OldLabelDoEndStatements, LogicalIntegerAssignment, - EmptySourceFile, ProgramReturn, ImplicitNoneTypeNever, - ImplicitNoneTypeAlways) + Carriagecontrol, Convert, Dispose, IOListLeadingComma, + AbbreviatedEditDescriptor, ProgramParentheses, PercentRefAndVal, + OmitFunctionDummies, CrayPointer, Hollerith, ArithmeticIF, Assign, + AssignedGOTO, Pause, OpenACC, OpenMP, CruftAfterAmpersand, ClassicCComments, + AdditionalFormats, BigIntLiterals, RealDoControls, + EquivalenceNumericWithCharacter, AdditionalIntrinsics, AnonymousParents, + OldLabelDoEndStatements, LogicalIntegerAssignment, EmptySourceFile, + ProgramReturn, ImplicitNoneTypeNever, ImplicitNoneTypeAlways) using LanguageFeatures = EnumSet; diff --git a/flang/include/flang/Common/Fortran.h b/flang/include/flang/Common/Fortran.h index df6b27c8ce3b1..5d5ab324e826e 100644 --- a/flang/include/flang/Common/Fortran.h +++ b/flang/include/flang/Common/Fortran.h @@ -52,6 +52,7 @@ ENUM_CLASS(IoSpecKind, Access, Action, Advance, Asynchronous, Blank, Decimal, Id, Iomsg, Iostat, Name, Named, Newunit, Nextrec, Nml, Number, Opened, Pad, Pending, Pos, Position, Read, Readwrite, Rec, Recl, Round, Sequential, Sign, Size, Status, Stream, Unformatted, Unit, Write, + Carriagecontrol, // nonstandard Convert, // nonstandard Dispose, // nonstandard ) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 7f9984bc50481..166e573b5cec3 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -2549,7 +2549,8 @@ using FileNameExpr = ScalarDefaultCharExpr; // POSITION = scalar-default-char-expr | RECL = scalar-int-expr | // ROUND = scalar-default-char-expr | SIGN = scalar-default-char-expr | // STATUS = scalar-default-char-expr -// @ | CONVERT = scalar-default-char-variable +// @ | CARRIAGECONTROL = scalar-default-char-variable +// | CONVERT = scalar-default-char-variable // | DISPOSE = scalar-default-char-variable WRAPPER_CLASS(StatusExpr, ScalarDefaultCharExpr); WRAPPER_CLASS(ErrLabel, Label); @@ -2559,7 +2560,7 @@ struct ConnectSpec { struct CharExpr { ENUM_CLASS(Kind, Access, Action, Asynchronous, Blank, Decimal, Delim, Encoding, Form, Pad, Position, Round, Sign, - /* extensions: */ Convert, Dispose) + /* extensions: */ Carriagecontrol, Convert, Dispose) TUPLE_CLASS_BOILERPLATE(CharExpr); std::tuple t; }; @@ -2767,7 +2768,8 @@ WRAPPER_CLASS(FlushStmt, std::list); // STATUS = scalar-default-char-variable | // UNFORMATTED = scalar-default-char-variable | // WRITE = scalar-default-char-variable -// @ | CONVERT = scalar-default-char-variable +// @ | CARRIAGECONTROL = scalar-default-char-variable +// | CONVERT = scalar-default-char-variable // | DISPOSE = scalar-default-char-variable struct InquireSpec { UNION_CLASS_BOILERPLATE(InquireSpec); @@ -2775,7 +2777,7 @@ struct InquireSpec { ENUM_CLASS(Kind, Access, Action, Asynchronous, Blank, Decimal, Delim, Direct, Encoding, Form, Formatted, Iomsg, Name, Pad, Position, Read, Readwrite, Round, Sequential, Sign, Stream, Status, Unformatted, Write, - /* extensions: */ Convert, Dispose) + /* extensions: */ Carriagecontrol, Convert, Dispose) TUPLE_CLASS_BOILERPLATE(CharVar); std::tuple t; }; diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp index 50dc5c80df6ac..3f79b79e32ee9 100644 --- a/flang/lib/Lower/IO.cpp +++ b/flang/lib/Lower/IO.cpp @@ -60,12 +60,12 @@ static constexpr std::tuple< mkIOKey(OutputComplex64), mkIOKey(OutputComplex32), mkIOKey(OutputAscii), mkIOKey(InputAscii), mkIOKey(OutputLogical), mkIOKey(InputLogical), mkIOKey(SetAccess), mkIOKey(SetAction), mkIOKey(SetAsynchronous), - mkIOKey(SetEncoding), mkIOKey(SetForm), mkIOKey(SetPosition), - mkIOKey(SetRecl), mkIOKey(SetStatus), mkIOKey(SetFile), mkIOKey(GetNewUnit), - mkIOKey(GetSize), mkIOKey(GetIoLength), mkIOKey(GetIoMsg), - mkIOKey(InquireCharacter), mkIOKey(InquireLogical), - mkIOKey(InquirePendingId), mkIOKey(InquireInteger64), - mkIOKey(EndIoStatement)> + mkIOKey(SetCarriagecontrol), mkIOKey(SetEncoding), mkIOKey(SetForm), + mkIOKey(SetPosition), mkIOKey(SetRecl), mkIOKey(SetStatus), + mkIOKey(SetFile), mkIOKey(GetNewUnit), mkIOKey(GetSize), + mkIOKey(GetIoLength), mkIOKey(GetIoMsg), mkIOKey(InquireCharacter), + mkIOKey(InquireLogical), mkIOKey(InquirePendingId), + mkIOKey(InquireInteger64), mkIOKey(EndIoStatement)> newIOTable; } // namespace Fortran::lower @@ -599,6 +599,9 @@ mlir::Value genIOOption( case Fortran::parser::ConnectSpec::CharExpr::Kind::Sign: ioFunc = getIORuntimeFunc(loc, builder); break; + case Fortran::parser::ConnectSpec::CharExpr::Kind::Carriagecontrol: + ioFunc = getIORuntimeFunc(loc, builder); + break; case Fortran::parser::ConnectSpec::CharExpr::Kind::Convert: llvm_unreachable("CONVERT not part of the runtime::io interface"); case Fortran::parser::ConnectSpec::CharExpr::Kind::Dispose: diff --git a/flang/lib/Parser/io-parsers.cpp b/flang/lib/Parser/io-parsers.cpp index 30f6db172c749..3615501a98edc 100644 --- a/flang/lib/Parser/io-parsers.cpp +++ b/flang/lib/Parser/io-parsers.cpp @@ -54,8 +54,9 @@ constexpr auto fileNameExpr{scalarDefaultCharExpr}; // POSITION = scalar-default-char-expr | RECL = scalar-int-expr | // ROUND = scalar-default-char-expr | SIGN = scalar-default-char-expr | // STATUS = scalar-default-char-expr -// @ | CONVERT = scalar-default-char-variable -// @ | DISPOSE = scalar-default-char-variable +// @ | CARRIAGECONTROL = scalar-default-char-variable +// | CONVERT = scalar-default-char-variable +// | DISPOSE = scalar-default-char-variable constexpr auto statusExpr{construct(scalarDefaultCharExpr)}; constexpr auto errLabel{construct(label)}; @@ -107,6 +108,10 @@ TYPE_PARSER(first(construct(maybe("UNIT ="_tok) >> fileUnitNumber), "SIGN =" >> pure(ConnectSpec::CharExpr::Kind::Sign), scalarDefaultCharExpr)), construct("STATUS =" >> statusExpr), + extension(construct( + construct("CARRIAGECONTROL =" >> + pure(ConnectSpec::CharExpr::Kind::Carriagecontrol), + scalarDefaultCharExpr))), extension( construct(construct( "CONVERT =" >> pure(ConnectSpec::CharExpr::Kind::Convert), @@ -357,7 +362,8 @@ TYPE_CONTEXT_PARSER("FLUSH statement"_en_US, // STREAM = scalar-default-char-variable | // STATUS = scalar-default-char-variable | // WRITE = scalar-default-char-variable -// @ | CONVERT = scalar-default-char-variable +// @ | CARRIAGECONTROL = scalar-default-char-variable +// | CONVERT = scalar-default-char-variable // | DISPOSE = scalar-default-char-variable TYPE_PARSER(first(construct(maybe("UNIT ="_tok) >> fileUnitNumber), construct("FILE =" >> fileNameExpr), @@ -475,6 +481,11 @@ TYPE_PARSER(first(construct(maybe("UNIT ="_tok) >> fileUnitNumber), construct("WRITE =" >> construct(pure(InquireSpec::CharVar::Kind::Write), scalarDefaultCharVariable)), + extension( + construct("CARRIAGECONTROL =" >> + construct( + pure(InquireSpec::CharVar::Kind::Carriagecontrol), + scalarDefaultCharVariable))), extension(construct( "CONVERT =" >> construct( pure(InquireSpec::CharVar::Kind::Convert), diff --git a/flang/lib/Semantics/check-io.cpp b/flang/lib/Semantics/check-io.cpp index d00f56c38042d..26702f6c48bf9 100644 --- a/flang/lib/Semantics/check-io.cpp +++ b/flang/lib/Semantics/check-io.cpp @@ -135,6 +135,9 @@ void IoChecker::Enter(const parser::ConnectSpec::CharExpr &spec) { case ParseKind::Sign: specKind = IoSpecKind::Sign; break; + case ParseKind::Carriagecontrol: + specKind = IoSpecKind::Carriagecontrol; + break; case ParseKind::Convert: specKind = IoSpecKind::Convert; break; @@ -152,6 +155,13 @@ void IoChecker::Enter(const parser::ConnectSpec::CharExpr &spec) { flags_.set(Flag::AccessStream, s == "STREAM"); } CheckStringValue(specKind, *charConst, parser::FindSourceLocation(spec)); + if (specKind == IoSpecKind::Carriagecontrol && + (s == "FORTRAN" || s == "NONE")) { + context_.Say(parser::FindSourceLocation(spec), + "Unimplemented %s value '%s'"_err_en_US, + parser::ToUpperCaseLetters(common::EnumToString(specKind)), + *charConst); + } } } @@ -378,6 +388,9 @@ void IoChecker::Enter(const parser::InquireSpec::CharVar &spec) { case ParseKind::Write: specKind = IoSpecKind::Write; break; + case ParseKind::Carriagecontrol: + specKind = IoSpecKind::Carriagecontrol; + break; case ParseKind::Convert: specKind = IoSpecKind::Convert; break; @@ -821,6 +834,7 @@ void IoChecker::CheckStringValue(IoSpecKind specKind, const std::string &value, {IoSpecKind::Status, // Open values; Close values are {"DELETE", "KEEP"}. {"NEW", "OLD", "REPLACE", "SCRATCH", "UNKNOWN"}}, + {IoSpecKind::Carriagecontrol, {"LIST", "FORTRAN", "NONE"}}, {IoSpecKind::Convert, {"BIG_ENDIAN", "LITTLE_ENDIAN", "NATIVE"}}, {IoSpecKind::Dispose, {"DELETE", "KEEP"}}, }; diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index 30f343773f90d..18c3f8241f08f 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -655,6 +655,31 @@ bool IONAME(SetAsynchronous)( } } +bool IONAME(SetCarriagecontrol)( + Cookie cookie, const char *keyword, std::size_t length) { + IoStatementState &io{*cookie}; + auto *open{io.get_if()}; + if (!open) { + io.GetIoErrorHandler().Crash( + "SetCarriageControl() called when not in an OPEN statement"); + } + static const char *keywords[]{"LIST", "FORTRAN", "NONE", nullptr}; + switch (IdentifyValue(keyword, length, keywords)) { + case 0: + return true; + case 1: + case 2: + open->SignalError(IostatErrorInKeyword, + "Unimplemented CARRIAGECONTROL='%.*s'", static_cast(length), + keyword); + return false; + default: + open->SignalError(IostatErrorInKeyword, "Invalid CARRIAGECONTROL='%.*s'", + static_cast(length), keyword); + return false; + } +} + bool IONAME(SetConvert)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; @@ -708,7 +733,7 @@ bool IONAME(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { auto *open{io.get_if()}; if (!open) { io.GetIoErrorHandler().Crash( - "SetEncoding() called when not in an OPEN statement"); + "SetForm() called when not in an OPEN statement"); } static const char *keywords[]{"FORMATTED", "UNFORMATTED", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { diff --git a/flang/runtime/io-api.h b/flang/runtime/io-api.h index a38152d6ec1c1..369013fee8bc1 100644 --- a/flang/runtime/io-api.h +++ b/flang/runtime/io-api.h @@ -260,6 +260,8 @@ bool IONAME(SetAccess)(Cookie, const char *, std::size_t); bool IONAME(SetAction)(Cookie, const char *, std::size_t); // ASYNCHRONOUS=YES, NO bool IONAME(SetAsynchronous)(Cookie, const char *, std::size_t); +// CARRIAGECONTROL=LIST, FORTRAN, NONE +bool IONAME(SetCarriagecontrol)(Cookie, const char *, std::size_t); // CONVERT=NATIVE, LITTLE_ENDIAN, BIG_ENDIAN, or SWAP bool IONAME(SetConvert)(Cookie, const char *, std::size_t); // ENCODING=UTF-8, DEFAULT diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 8300b1ea3c27b..9bf0284358b96 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -779,6 +779,9 @@ bool InquireUnitState::Inquire( : unit().modes.editingFlags & blankZero ? "ZERO" : "NULL"; break; + case HashInquiryKeyword("CARRIAGECONTROL"): + str = "LIST"; + break; case HashInquiryKeyword("CONVERT"): str = unit().swapEndianness() ? "SWAP" : "NATIVE"; break; @@ -976,6 +979,7 @@ bool InquireNoUnitState::Inquire( case HashInquiryKeyword("ACTION"): case HashInquiryKeyword("ASYNCHRONOUS"): case HashInquiryKeyword("BLANK"): + case HashInquiryKeyword("CARRIAGECONTROL"): case HashInquiryKeyword("CONVERT"): case HashInquiryKeyword("DECIMAL"): case HashInquiryKeyword("DELIM"): @@ -1061,6 +1065,7 @@ bool InquireUnconnectedFileState::Inquire( case HashInquiryKeyword("ACTION"): case HashInquiryKeyword("ASYNCHRONOUS"): case HashInquiryKeyword("BLANK"): + case HashInquiryKeyword("CARRIAGECONTROL"): case HashInquiryKeyword("CONVERT"): case HashInquiryKeyword("DECIMAL"): case HashInquiryKeyword("DELIM"): diff --git a/flang/test/Semantics/io01.f90 b/flang/test/Semantics/io01.f90 index 9828d4afe8921..17b68e407407e 100644 --- a/flang/test/Semantics/io01.f90 +++ b/flang/test/Semantics/io01.f90 @@ -62,6 +62,7 @@ open(81, convert=convert_(2), dispose=dispose_(2)) open(access='STREAM', 90) ! nonstandard + open (unit=91, file='xfile', carriagecontrol='list') ! nonstandard !ERROR: OPEN statement must have a UNIT or NEWUNIT specifier !ERROR: If ACCESS='DIRECT' appears, RECL must also appear @@ -127,4 +128,10 @@ !ERROR: If NEWUNIT appears, FILE or STATUS='SCRATCH' must also appear open(newunit=nn, status='old') + + !ERROR: Unimplemented CARRIAGECONTROL value 'fortran' + open (unit=116, file='xfile', carriagecontrol='fortran') ! nonstandard + + !ERROR: Invalid CARRIAGECONTROL value 'nonsense' + open (unit=116, file='xfile', carriagecontrol='nonsense') ! nonstandard end diff --git a/flang/test/Semantics/io05.f90 b/flang/test/Semantics/io05.f90 index ed6b77f7d4ad9..666b200ad9a3c 100644 --- a/flang/test/Semantics/io05.f90 +++ b/flang/test/Semantics/io05.f90 @@ -25,6 +25,7 @@ inquire(pending=v(5), file='abc') inquire(10, id=id, pending=v(5)) inquire(10, id=const_id, pending=v(5)) + inquire(10, carriagecontrol=c(1)) ! nonstandard ! using variable 'cv' multiple times seems to be allowed inquire(file='abc', & From 62dbb7e54c65386f3cd73ef761a22b73532158f0 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Wed, 2 Sep 2020 14:53:06 -0400 Subject: [PATCH 041/465] Revert "[HIP] Change default --gpu-max-threads-per-block value to 1024" Temporarily revert commit 04abbb3a78186aa92809866b43217c32cba90b71 due to regressions in some HIP apps due backend issues revealed by this change. Will re-commit it when backend issues are fixed. --- clang/include/clang/Basic/LangOptions.def | 2 +- clang/lib/CodeGen/TargetInfo.cpp | 8 ++------ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu | 2 +- clang/test/CodeGenCUDA/kernel-amdgcn.cu | 2 +- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 3132e76354189..9846809763f83 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -240,7 +240,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr function LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions") LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code") LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP") -LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kernel launch bounds for HIP") +LANGOPT(GPUMaxThreadsPerBlock, 32, 256, "default max threads per block for kernel launch bounds for HIP") LANGOPT(SYCL , 1, 0, "SYCL") LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device") diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index d6efd54220878..e1ab61f10585d 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9066,13 +9066,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( assert(Max == 0 && "Max must be zero"); } else if (IsOpenCLKernel || IsHIPKernel) { // By default, restrict the maximum size to a value specified by - // --gpu-max-threads-per-block=n or its default value for HIP. - const unsigned OpenCLDefaultMaxWorkGroupSize = 256; - const unsigned DefaultMaxWorkGroupSize = - IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize - : M.getLangOpts().GPUMaxThreadsPerBlock; + // --gpu-max-threads-per-block=n or its default value. std::string AttrVal = - std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize); + std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock); F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu index 7a9fd2527272a..5415bddffc899 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -39,7 +39,7 @@ __global__ void num_vgpr_64() { // NAMD-NOT: "amdgpu-num-vgpr" // NAMD-NOT: "amdgpu-num-sgpr" -// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true" +// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true" // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64" // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2" diff --git a/clang/test/CodeGenCUDA/kernel-amdgcn.cu b/clang/test/CodeGenCUDA/kernel-amdgcn.cu index 6066469f76470..135d3030480c6 100644 --- a/clang/test/CodeGenCUDA/kernel-amdgcn.cu +++ b/clang/test/CodeGenCUDA/kernel-amdgcn.cu @@ -39,4 +39,4 @@ int main() { launch((void*)D.Empty()); return 0; } -// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" +// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256" From a27398a8151dc553dae85ede12f966e5b981b64b Mon Sep 17 00:00:00 2001 From: Ahsan Saghir Date: Wed, 2 Sep 2020 14:13:23 -0500 Subject: [PATCH 042/465] [PowerPC] Update MemorySanitizer test to cater for number of CPUs > 1024 MemorySanitizer test fails on systems with more than 1024 CPUs. This patch updates the test to make it work for machines that have more than 1024 CPUs. This helps to fix errors on the PowerPC sanitizer bot. Reviewed By: #powerpc, nemanjai Differential Revision: https://reviews.llvm.org/D87053 --- compiler-rt/lib/msan/tests/msan_test.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index 53b9a3e563e94..4c98bb4861f20 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -3229,9 +3229,19 @@ TEST(MemorySanitizer, dlopenFailed) { #if !defined(__FreeBSD__) && !defined(__NetBSD__) TEST(MemorySanitizer, sched_getaffinity) { cpu_set_t mask; - int res = sched_getaffinity(getpid(), sizeof(mask), &mask); - ASSERT_EQ(0, res); - EXPECT_NOT_POISONED(mask); + if (sched_getaffinity(getpid(), sizeof(mask), &mask) == 0) + EXPECT_NOT_POISONED(mask); + else { + // The call to sched_getaffinity() may have failed because the Affinity + // mask is too small for the number of CPUs on the system (i.e. the + // system has more than 1024 CPUs). Allocate a mask large enough for + // twice as many CPUs. + cpu_set_t *DynAffinity; + DynAffinity = CPU_ALLOC(2048); + int res = sched_getaffinity(getpid(), CPU_ALLOC_SIZE(2048), DynAffinity); + ASSERT_EQ(0, res); + EXPECT_NOT_POISONED(*DynAffinity); + } } #endif From 099c089d4b4117fd654aa6e4dd544d7680fa80b9 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 2 Sep 2020 16:14:58 +0100 Subject: [PATCH 043/465] [APInt] New member function setBitVal Differential Revision: https://reviews.llvm.org/D87033 --- llvm/include/llvm/ADT/APInt.h | 8 ++++++++ .../lib/CodeGen/SelectionDAG/TargetLowering.cpp | 8 ++------ llvm/lib/MCA/HardwareUnits/RegisterFile.cpp | 17 ++++------------- llvm/lib/Support/APInt.cpp | 11 +++-------- 4 files changed, 17 insertions(+), 27 deletions(-) diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index e3032a19f111f..5e4206732f4df 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -1450,6 +1450,14 @@ class LLVM_NODISCARD APInt { setBit(BitWidth - 1); } + /// Set a given bit to a given value. + void setBitVal(unsigned BitPosition, bool BitValue) { + if (BitValue) + setBit(BitPosition); + else + clearBit(BitPosition); + } + /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1. /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls /// setBits when \p loBit < \p hiBit. diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 60c0c20ffacdd..a7816b8616e65 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2603,13 +2603,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero, TLO, Depth + 1)) return true; - KnownUndef.clearBit(Idx); - if (Scl.isUndef()) - KnownUndef.setBit(Idx); + KnownUndef.setBitVal(Idx, Scl.isUndef()); - KnownZero.clearBit(Idx); - if (isNullConstant(Scl) || isNullFPConstant(Scl)) - KnownZero.setBit(Idx); + KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl)); break; } diff --git a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp index 7ea5506f11d6a..11a24a6889f14 100644 --- a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp +++ b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp @@ -196,15 +196,9 @@ void RegisterFile::addRegisterWrite(WriteRef Write, // Update zero registers. MCPhysReg ZeroRegisterID = WS.clearsSuperRegisters() ? RegID : WS.getRegisterID(); - if (IsWriteZero) { - ZeroRegisters.setBit(ZeroRegisterID); - for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) - ZeroRegisters.setBit(*I); - } else { - ZeroRegisters.clearBit(ZeroRegisterID); - for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) - ZeroRegisters.clearBit(*I); - } + ZeroRegisters.setBitVal(ZeroRegisterID, IsWriteZero); + for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I) + ZeroRegisters.setBitVal(*I, IsWriteZero); // If this is move has been eliminated, then the call to tryEliminateMove // should have already updated all the register mappings. @@ -233,10 +227,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write, RegisterMappings[*I].second.AliasRegID = 0U; } - if (IsWriteZero) - ZeroRegisters.setBit(*I); - else - ZeroRegisters.clearBit(*I); + ZeroRegisters.setBitVal(*I, IsWriteZero); } } diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index 9a6f93feaa29f..fc339de45af43 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -338,8 +338,7 @@ void APInt::flipAllBitsSlowCase() { /// Toggles a given bit to its opposite value. void APInt::flipBit(unsigned bitPosition) { assert(bitPosition < BitWidth && "Out of the bit-width range!"); - if ((*this)[bitPosition]) clearBit(bitPosition); - else setBit(bitPosition); + setBitVal(bitPosition, !(*this)[bitPosition]); } void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { @@ -393,12 +392,8 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // General case - set/clear individual bits in dst based on src. // TODO - there is scope for optimization here, but at the moment this code // path is barely used so prefer readability over performance. - for (unsigned i = 0; i != subBitWidth; ++i) { - if (subBits[i]) - setBit(bitPosition + i); - else - clearBit(bitPosition + i); - } + for (unsigned i = 0; i != subBitWidth; ++i) + setBitVal(bitPosition + i, subBits[i]); } void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) { From 27714075848e7f05a297317ad28ad2570d8e5a43 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Wed, 2 Sep 2020 15:30:19 -0500 Subject: [PATCH 044/465] [PowerPC] Do not legalize vector FDIV without VSX Quite a while ago, we legalized these nodes as we added custom handling for reciprocal estimates in the back end. We have since moved to target-independent combines but neglected to turn off legalization. As a result, we can now get selection failures on non-VSX subtargets as evidenced in the listed PR. Fixes: https://bugs.llvm.org/show_bug.cgi?id=47373 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 8 +- llvm/test/CodeGen/PowerPC/pr47373.ll | 180 ++++++++++++++++++++ 2 files changed, 181 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/pr47373.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 4ffb35dda4fa1..4fe29f7f29944 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -860,7 +860,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); - if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { + if (Subtarget.hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } @@ -1234,12 +1234,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::SELECT_CC); } - // Use reciprocal estimates. - if (TM.Options.UnsafeFPMath) { - setTargetDAGCombine(ISD::FDIV); - setTargetDAGCombine(ISD::FSQRT); - } - if (Subtarget.hasP9Altivec()) { setTargetDAGCombine(ISD::ABS); setTargetDAGCombine(ISD::VSELECT); diff --git a/llvm/test/CodeGen/PowerPC/pr47373.ll b/llvm/test/CodeGen/PowerPC/pr47373.ll new file mode 100644 index 0000000000000..559f4f9a8b4ae --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr47373.ll @@ -0,0 +1,180 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64-unknown-freebsd13.0 \ +; RUN: -mcpu=ppc64 -ppc-asm-full-reg-names < %s | FileCheck %s +@a = local_unnamed_addr global float* null, align 8 + +; Function Attrs: nounwind +define void @d() local_unnamed_addr #0 { +; CHECK-LABEL: d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -208(r1) +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: std r29, 184(r1) # 8-byte Folded Spill +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: std r30, 192(r1) # 8-byte Folded Spill +; CHECK-NEXT: ld r29, 0(r3) +; CHECK-NEXT: bl c +; CHECK-NEXT: nop +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl b +; CHECK-NEXT: nop +; CHECK-NEXT: cmpwi r30, 1 +; CHECK-NEXT: blt cr0, .LBB0_9 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: cmplwi r30, 4 +; CHECK-NEXT: clrldi r4, r30, 32 +; CHECK-NEXT: li r5, 0 +; CHECK-NEXT: blt cr0, .LBB0_7 +; CHECK-NEXT: # %bb.2: # %vector.memcheck +; CHECK-NEXT: rldic r6, r30, 2, 30 +; CHECK-NEXT: add r7, r3, r6 +; CHECK-NEXT: cmpld r29, r7 +; CHECK-NEXT: add r6, r29, r6 +; CHECK-NEXT: bc 4, lt, .LBB0_4 +; CHECK-NEXT: # %bb.3: # %vector.memcheck +; CHECK-NEXT: cmpld r3, r6 +; CHECK-NEXT: bc 12, lt, .LBB0_7 +; CHECK-NEXT: .LBB0_4: # %vector.ph +; CHECK-NEXT: rlwinm r5, r4, 0, 0, 29 +; CHECK-NEXT: li r7, 15 +; CHECK-NEXT: addi r6, r5, -4 +; CHECK-NEXT: addi r8, r1, 144 +; CHECK-NEXT: rldicl r6, r6, 62, 2 +; CHECK-NEXT: addi r9, r1, 128 +; CHECK-NEXT: addi r6, r6, 1 +; CHECK-NEXT: addi r10, r1, 160 +; CHECK-NEXT: mtctr r6 +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: addi r11, r1, 112 +; CHECK-NEXT: .LBB0_5: # %vector.body +; CHECK-NEXT: # +; CHECK-NEXT: add r12, r3, r6 +; CHECK-NEXT: lvx v3, r3, r6 +; CHECK-NEXT: lvx v5, r12, r7 +; CHECK-NEXT: add r12, r29, r6 +; CHECK-NEXT: lvsl v2, r3, r6 +; CHECK-NEXT: vperm v2, v3, v5, v2 +; CHECK-NEXT: lvx v3, r29, r6 +; CHECK-NEXT: lvx v5, r12, r7 +; CHECK-NEXT: lvsl v4, r29, r6 +; CHECK-NEXT: stvx v2, 0, r8 +; CHECK-NEXT: vperm v2, v3, v5, v4 +; CHECK-NEXT: stvx v2, 0, r9 +; CHECK-NEXT: lfs f0, 156(r1) +; CHECK-NEXT: lfs f1, 140(r1) +; CHECK-NEXT: fdivs f0, f1, f0 +; CHECK-NEXT: lfs f1, 136(r1) +; CHECK-NEXT: stfs f0, 172(r1) +; CHECK-NEXT: lfs f0, 152(r1) +; CHECK-NEXT: fdivs f0, f1, f0 +; CHECK-NEXT: lfs f1, 132(r1) +; CHECK-NEXT: stfs f0, 168(r1) +; CHECK-NEXT: lfs f0, 148(r1) +; CHECK-NEXT: fdivs f0, f1, f0 +; CHECK-NEXT: lfs f1, 128(r1) +; CHECK-NEXT: stfs f0, 164(r1) +; CHECK-NEXT: lfs f0, 144(r1) +; CHECK-NEXT: fdivs f0, f1, f0 +; CHECK-NEXT: stfs f0, 160(r1) +; CHECK-NEXT: lvx v2, 0, r10 +; CHECK-NEXT: stvx v2, 0, r11 +; CHECK-NEXT: ld r0, 112(r1) +; CHECK-NEXT: stdx r0, r29, r6 +; CHECK-NEXT: addi r6, r6, 16 +; CHECK-NEXT: ld r0, 120(r1) +; CHECK-NEXT: std r0, 8(r12) +; CHECK-NEXT: bdnz .LBB0_5 +; CHECK-NEXT: # %bb.6: # %middle.block +; CHECK-NEXT: cmpld r5, r4 +; CHECK-NEXT: beq cr0, .LBB0_9 +; CHECK-NEXT: .LBB0_7: # %for.body.preheader18 +; CHECK-NEXT: sldi r6, r5, 2 +; CHECK-NEXT: sub r5, r4, r5 +; CHECK-NEXT: addi r6, r6, -4 +; CHECK-NEXT: add r3, r3, r6 +; CHECK-NEXT: add r4, r29, r6 +; CHECK-NEXT: mtctr r5 +; CHECK-NEXT: .LBB0_8: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: lfsu f0, 4(r4) +; CHECK-NEXT: lfsu f1, 4(r3) +; CHECK-NEXT: fdivs f0, f0, f1 +; CHECK-NEXT: stfs f0, 0(r4) +; CHECK-NEXT: bdnz .LBB0_8 +; CHECK-NEXT: .LBB0_9: # %for.end +; CHECK-NEXT: ld r30, 192(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 184(r1) # 8-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 208 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load float*, float** @a, align 8 + %call = call signext i32 bitcast (i32 (...)* @c to i32 ()*)() #2 + %call1 = call float* bitcast (float* (...)* @b to float* ()*)() #2 + %cmp11 = icmp sgt i32 %call, 0 + br i1 %cmp11, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %call to i64 + %min.iters.check = icmp ult i32 %call, 4 + br i1 %min.iters.check, label %for.body.preheader18, label %vector.memcheck + +vector.memcheck: ; preds = %for.body.preheader + %scevgep = getelementptr float, float* %0, i64 %wide.trip.count + %scevgep15 = getelementptr float, float* %call1, i64 %wide.trip.count + %bound0 = icmp ult float* %0, %scevgep15 + %bound1 = icmp ult float* %call1, %scevgep + %found.conflict = and i1 %bound0, %bound1 + br i1 %found.conflict, label %for.body.preheader18, label %vector.ph + +vector.ph: ; preds = %vector.memcheck + %n.vec = and i64 %wide.trip.count, 4294967292 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %1 = getelementptr inbounds float, float* %call1, i64 %index + %2 = bitcast float* %1 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %2, align 4 + %3 = getelementptr inbounds float, float* %0, i64 %index + %4 = bitcast float* %3 to <4 x float>* + %wide.load17 = load <4 x float>, <4 x float>* %4, align 4 + %5 = fdiv reassoc nsz arcp afn <4 x float> %wide.load17, %wide.load + %6 = bitcast float* %3 to <4 x float>* + store <4 x float> %5, <4 x float>* %6, align 4 + %index.next = add i64 %index, 4 + %7 = icmp eq i64 %index.next, %n.vec + br i1 %7, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + %cmp.n = icmp eq i64 %n.vec, %wide.trip.count + br i1 %cmp.n, label %for.end, label %for.body.preheader18 + +for.body.preheader18: ; preds = %middle.block, %vector.memcheck, %for.body.preheader + %indvars.iv.ph = phi i64 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ] + br label %for.body + +for.body: ; preds = %for.body.preheader18, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader18 ] + %arrayidx = getelementptr inbounds float, float* %call1, i64 %indvars.iv + %8 = load float, float* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds float, float* %0, i64 %indvars.iv + %9 = load float, float* %arrayidx3, align 4 + %div = fdiv reassoc nsz arcp afn float %9, %8 + store float %div, float* %arrayidx3, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %middle.block, %entry + ret void +} + +declare signext i32 @c(...) local_unnamed_addr #1 + +declare float* @b(...) local_unnamed_addr #1 + +attributes #0 = { nounwind } From 65f20ea1133b3111a982c76eea74a609fa083184 Mon Sep 17 00:00:00 2001 From: Diego Caballero Date: Wed, 2 Sep 2020 22:26:57 +0300 Subject: [PATCH 045/465] [mlir][Affine] Fix AffineLoopInvariantCodeMotion Make sure that memory ops that are defined inside the loop are registered as such in 'defineOp'. In the test provided, the 'mulf' op was hoisted outside the loop nest even when its 'affine.load' operand was not. Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D86982 --- .../AffineLoopInvariantCodeMotion.cpp | 8 ++- .../affine-loop-invariant-code-motion.mlir | 65 ++++++++++++++++++- 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp index 364168ce6e2ab..df8d875292148 100644 --- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp @@ -84,6 +84,11 @@ bool isOpLoopInvariant(Operation &op, Value indVar, // TODO: Support DMA ops. return false; } else if (!isa(op)) { + // Register op in the set of ops defined inside the loop. This set is used + // to prevent hoisting ops that depend on other ops defined inside the loop + // which are themselves not being hoisted. + definedOps.insert(&op); + if (isMemRefDereferencingOp(op)) { Value memref = isa(op) ? cast(op).getMemRef() @@ -111,9 +116,6 @@ bool isOpLoopInvariant(Operation &op, Value indVar, } } - // Insert this op in the defined ops list. - definedOps.insert(&op); - if (op.getNumOperands() == 0 && !isa(op)) { LLVM_DEBUG(llvm::dbgs() << "\nNon-constant op with 0 operands\n"); return false; diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir index 4c9c0dbbf774b..8a2ed32757e44 100644 --- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir +++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir @@ -22,6 +22,8 @@ func @nested_loops_both_having_invariant_code() { return } +// ----- + // The store-load forwarding can see through affine apply's since it relies on // dependence information. // CHECK-LABEL: func @store_affine_apply @@ -36,12 +38,14 @@ func @store_affine_apply() -> memref<10xf32> { // CHECK: %cst = constant 7.000000e+00 : f32 // CHECK-NEXT: %0 = alloc() : memref<10xf32> // CHECK-NEXT: affine.for %arg0 = 0 to 10 { -// CHECK-NEXT: %1 = affine.apply #map3(%arg0) +// CHECK-NEXT: %1 = affine.apply #map{{[0-9]+}}(%arg0) // CHECK-NEXT: affine.store %cst, %0[%1] : memref<10xf32> // CHECK-NEXT: } // CHECK-NEXT: return %0 : memref<10xf32> } +// ----- + func @nested_loops_code_invariant_to_both() { %m = alloc() : memref<10xf32> %cf7 = constant 7.0 : f32 @@ -61,6 +65,8 @@ func @nested_loops_code_invariant_to_both() { return } +// ----- + func @single_loop_nothing_invariant() { %m1 = alloc() : memref<10xf32> %m2 = alloc() : memref<10xf32> @@ -82,6 +88,8 @@ func @single_loop_nothing_invariant() { return } +// ----- + func @invariant_code_inside_affine_if() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -98,7 +106,7 @@ func @invariant_code_inside_affine_if() { // CHECK: %0 = alloc() : memref<10xf32> // CHECK-NEXT: %cst = constant 8.000000e+00 : f32 // CHECK-NEXT: affine.for %arg0 = 0 to 10 { - // CHECK-NEXT: %1 = affine.apply #map3(%arg0) + // CHECK-NEXT: %1 = affine.apply #map{{[0-9]+}}(%arg0) // CHECK-NEXT: affine.if #set0(%arg0, %1) { // CHECK-NEXT: %2 = addf %cst, %cst : f32 // CHECK-NEXT: affine.store %2, %0[%arg0] : memref<10xf32> @@ -108,6 +116,7 @@ func @invariant_code_inside_affine_if() { return } +// ----- func @dependent_stores() { %m = alloc() : memref<10xf32> @@ -137,6 +146,8 @@ func @dependent_stores() { return } +// ----- + func @independent_stores() { %m = alloc() : memref<10xf32> %cf7 = constant 7.0 : f32 @@ -165,6 +176,8 @@ func @independent_stores() { return } +// ----- + func @load_dependent_store() { %m = alloc() : memref<10xf32> %cf7 = constant 7.0 : f32 @@ -192,6 +205,8 @@ func @load_dependent_store() { return } +// ----- + func @load_after_load() { %m = alloc() : memref<10xf32> %cf7 = constant 7.0 : f32 @@ -219,6 +234,8 @@ func @load_after_load() { return } +// ----- + func @invariant_affine_if() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -244,6 +261,8 @@ func @invariant_affine_if() { return } +// ----- + func @invariant_affine_if2() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -271,6 +290,8 @@ func @invariant_affine_if2() { return } +// ----- + func @invariant_affine_nested_if() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -303,6 +324,8 @@ func @invariant_affine_nested_if() { return } +// ----- + func @invariant_affine_nested_if_else() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -339,6 +362,8 @@ func @invariant_affine_nested_if_else() { return } +// ----- + func @invariant_affine_nested_if_else2() { %m = alloc() : memref<10xf32> %m2 = alloc() : memref<10xf32> @@ -375,6 +400,7 @@ func @invariant_affine_nested_if_else2() { return } +// ----- func @invariant_affine_nested_if2() { %m = alloc() : memref<10xf32> @@ -406,6 +432,8 @@ func @invariant_affine_nested_if2() { return } +// ----- + func @invariant_affine_for_inside_affine_if() { %m = alloc() : memref<10xf32> %cf8 = constant 8.0 : f32 @@ -438,6 +466,7 @@ func @invariant_affine_for_inside_affine_if() { return } +// ----- func @invariant_constant_and_load() { %m = alloc() : memref<100xf32> @@ -459,6 +488,7 @@ func @invariant_constant_and_load() { return } +// ----- func @nested_load_store_same_memref() { %m = alloc() : memref<10xf32> @@ -483,6 +513,7 @@ func @nested_load_store_same_memref() { return } +// ----- func @nested_load_store_same_memref2() { %m = alloc() : memref<10xf32> @@ -505,3 +536,33 @@ func @nested_load_store_same_memref2() { return } + +// ----- + +// CHECK-LABEL: func @do_not_hoist_dependent_side_effect_free_op +func @do_not_hoist_dependent_side_effect_free_op(%arg0: memref<10x512xf32>) { + %0 = alloca() : memref<1xf32> + %cst = constant 8.0 : f32 + affine.for %i = 0 to 512 { + affine.for %j = 0 to 10 { + %5 = affine.load %arg0[%i, %j] : memref<10x512xf32> + %6 = affine.load %0[0] : memref<1xf32> + %add = addf %5, %6 : f32 + affine.store %add, %0[0] : memref<1xf32> + } + %3 = affine.load %0[0] : memref<1xf32> + %4 = mulf %3, %cst : f32 // It shouldn't be hoisted. + } + return +} + +// CHECK: affine.for +// CHECK-NEXT: affine.for +// CHECK-NEXT: affine.load +// CHECK-NEXT: affine.load +// CHECK-NEXT: addf +// CHECK-NEXT: affine.store +// CHECK-NEXT: } +// CHECK-NEXT: affine.load +// CHECK-NEXT: mulf +// CHECK-NEXT: } From 3b12e12d4b9efbdd28113da6db0f74b660257c83 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Wed, 2 Sep 2020 14:14:26 -0700 Subject: [PATCH 046/465] Revert "[clang] Add missing .def files to Clang's modulemap" This reverts commit e0e7eb2e2648aee83caf2ecfe2972ce2f653d306. [the commit this fixes up was reverted] --- clang/include/clang/module.modulemap | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap index 6290548b41f1c..13d4dbf9dc2e8 100644 --- a/clang/include/clang/module.modulemap +++ b/clang/include/clang/module.modulemap @@ -52,10 +52,8 @@ module Clang_Basic { textual header "Basic/BuiltinsX86_64.def" textual header "Basic/BuiltinsXCore.def" textual header "Basic/CodeGenOptions.def" - textual header "Basic/CommentOptions.def" textual header "Basic/DiagnosticOptions.def" textual header "Basic/Features.def" - textual header "Basic/FileSystemOptions.def" textual header "Basic/FPOptions.def" textual header "Basic/MSP430Target.def" textual header "Basic/LangOptions.def" @@ -65,7 +63,6 @@ module Clang_Basic { textual header "Basic/OpenMPKinds.def" textual header "Basic/OperatorKinds.def" textual header "Basic/Sanitizers.def" - textual header "Basic/TargetOptions.def" textual header "Basic/TokenKinds.def" textual header "Basic/X86Target.def" @@ -110,35 +107,17 @@ module Clang_Frontend { umbrella "Frontend" textual header "Basic/LangStandards.def" - textual header "Frontend/DependencyOutputOptions.def" - textual header "Frontend/FrontendOptions.def" - textual header "Frontend/MigratorOptions.def" - textual header "Frontend/PreprocessorOutputOptions.def" module * { export * } } module Clang_FrontendTool { requires cplusplus umbrella "FrontendTool" module * { export * } } module Clang_Index { requires cplusplus umbrella "Index" module * { export * } } -module Clang_Lex { - requires cplusplus - umbrella "Lex" - textual header "Lex/HeaderSearchOptions.def" - textual header "Lex/PreprocessorOptions.def" - - module * { export * } -} +module Clang_Lex { requires cplusplus umbrella "Lex" module * { export * } } module Clang_Parse { requires cplusplus umbrella "Parse" module * { export * } } module Clang_Rewrite { requires cplusplus umbrella "Rewrite/Core" module * { export * } } module Clang_RewriteFrontend { requires cplusplus umbrella "Rewrite/Frontend" module * { export * } } -module Clang_Sema { - requires cplusplus - umbrella "Sema" - - textual header "Sema/CodeCompleteOptions.def" - - module * { export * } -} +module Clang_Sema { requires cplusplus umbrella "Sema" module * { export * } } module Clang_Serialization { requires cplusplus From 272742a92d2443893eb98a7b3460e243e34278f9 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Wed, 2 Sep 2020 13:48:44 -0700 Subject: [PATCH 047/465] Perform an extra consistency check when searching ModuleManager's cache for implicit modules. The ModuleManager's use of FileEntry nodes as the keys for its map of loaded modules is less than ideal. Uniqueness for FileEntry nodes is maintained by FileManager, which in turn uses inode numbers on hosts that support that. When coupled with the module cache's proclivity for turning over and deleting stale PCMs, this means entries for different module files can wind up reusing the same underlying inode. When this happens, subsequent accesses to the Modules map will disagree on the ModuleFile associated with a given file. In general, it is not sufficient to resolve this conundrum with a type like FileEntryRef that stores the name of the FileEntry node on first access because of path canonicalization issues. However, the paths constructed for implicit module builds are fully under Clang's control. We *can*, therefore, rely on their structure being consistent across operating systems and across subsequent accesses to the Modules map. To mitigate the effects of inode reuse, perform an extra name check when implicit modules are returned from the cache. This has the effect of forcing reused FileEntry nodes to stomp over existing-but-stale entries in the cache, which simulates a miss - exactly the desired behavior. rdar://48443680 Patch by Robert Widmann! Differential Revision: https://reviews.llvm.org/D86823 --- clang/lib/Serialization/ModuleManager.cpp | 37 ++++++++++++++++++----- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index a42ed2f3c179d..542e75e77c3a5 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -132,15 +132,38 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type, return Missing; } + // The ModuleManager's use of FileEntry nodes as the keys for its map of + // loaded modules is less than ideal. Uniqueness for FileEntry nodes is + // maintained by FileManager, which in turn uses inode numbers on hosts + // that support that. When coupled with the module cache's proclivity for + // turning over and deleting stale PCMs, this means entries for different + // module files can wind up reusing the same underlying inode. When this + // happens, subsequent accesses to the Modules map will disagree on the + // ModuleFile associated with a given file. In general, it is not sufficient + // to resolve this conundrum with a type like FileEntryRef that stores the + // name of the FileEntry node on first access because of path canonicalization + // issues. However, the paths constructed for implicit module builds are + // fully under Clang's control. We *can*, therefore, rely on their structure + // being consistent across operating systems and across subsequent accesses + // to the Modules map. + auto implicitModuleNamesMatch = [](ModuleKind Kind, const ModuleFile *MF, + const FileEntry *Entry) -> bool { + if (Kind != MK_ImplicitModule) + return true; + return Entry->getName() == MF->FileName; + }; + // Check whether we already loaded this module, before if (ModuleFile *ModuleEntry = Modules.lookup(Entry)) { - // Check the stored signature. - if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr)) - return OutOfDate; - - Module = ModuleEntry; - updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc); - return AlreadyLoaded; + if (implicitModuleNamesMatch(Type, ModuleEntry, Entry)) { + // Check the stored signature. + if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr)) + return OutOfDate; + + Module = ModuleEntry; + updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc); + return AlreadyLoaded; + } } // Allocate a new module. From 00d9907a7ac86c3aa49c8cbb21a97094b5887ea9 Mon Sep 17 00:00:00 2001 From: Kostya Kortchinsky Date: Fri, 28 Aug 2020 11:44:39 -0700 Subject: [PATCH 048/465] [scudo][standalone] Enable secondary cache release on Fuchsia I had left this as a TODO, but it turns out it wasn't complicated. By specifying `MAP_RESIZABLE`, it allows us to keep the VMO which we can then use for release purposes. `releasePagesToOS` also had to be called the "proper" way, as Fuchsia requires the `Offset` field to be correct. This has no impact on non-Fuchsia platforms. Differential Revision: https://reviews.llvm.org/D86800 --- compiler-rt/lib/scudo/standalone/combined.h | 2 +- compiler-rt/lib/scudo/standalone/secondary.h | 11 +++-------- .../lib/scudo/standalone/tests/combined_test.cpp | 4 +++- .../lib/scudo/standalone/tests/secondary_test.cpp | 6 ------ 4 files changed, 7 insertions(+), 16 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index 6ca00c29ab732..465e581cf5134 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -306,7 +306,7 @@ class Allocator { void *Block = nullptr; uptr ClassId = 0; - uptr SecondaryBlockEnd; + uptr SecondaryBlockEnd = 0; if (LIKELY(PrimaryT::canAllocate(NeededSize))) { ClassId = SizeClassMap::getClassIdBySize(NeededSize); DCHECK_NE(ClassId, 0U); diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index b5bb53ddcf2d9..da435fd86adc8 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -75,11 +75,6 @@ template class MapAllocatorCache { public: - // Fuchsia doesn't allow releasing Secondary blocks yet. Note that 0 length - // arrays are an extension for some compilers. - // FIXME(kostyak): support (partially) the cache on Fuchsia. - static_assert(!SCUDO_FUCHSIA || EntriesArraySize == 0U, ""); - // Ensure the default maximum specified fits the array. static_assert(DefaultMaxEntriesCount <= EntriesArraySize, ""); @@ -392,9 +387,9 @@ void *MapAllocator::allocate(uptr Size, uptr AlignmentHint, } const uptr CommitSize = MapEnd - PageSize - CommitBase; - const uptr Ptr = - reinterpret_cast(map(reinterpret_cast(CommitBase), - CommitSize, "scudo:secondary", 0, &Data)); + const uptr Ptr = reinterpret_cast( + map(reinterpret_cast(CommitBase), CommitSize, "scudo:secondary", + MAP_RESIZABLE, &Data)); LargeBlock::Header *H = reinterpret_cast(Ptr); H->MapBase = MapBase; H->MapSize = MapEnd - MapBase; diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp index 9689c4265e06c..481158308c434 100644 --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -23,7 +24,8 @@ static bool Ready; static constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc; -static void disableDebuggerdMaybe() { +// Fuchsia complains that the function is not used. +UNUSED static void disableDebuggerdMaybe() { #if SCUDO_ANDROID // Disable the debuggerd signal handler on Android, without this we can end // up spending a significant amount of time creating tombstones. diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index 29efdb3060128..d9f2d2fcb95f1 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -56,18 +56,12 @@ template static void testSecondaryBasic(void) { TEST(ScudoSecondaryTest, SecondaryBasic) { testSecondaryBasic>(); -#if !SCUDO_FUCHSIA testSecondaryBasic>>(); testSecondaryBasic< scudo::MapAllocator>>(); -#endif } -#if SCUDO_FUCHSIA -using LargeAllocator = scudo::MapAllocator; -#else using LargeAllocator = scudo::MapAllocator>; -#endif // This exercises a variety of combinations of size and alignment for the // MapAllocator. The size computation done here mimic the ones done by the From de6caf871be79dc7549aebe4e4fb57d52f6ed202 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Tue, 1 Sep 2020 18:52:14 -0700 Subject: [PATCH 049/465] run in terminal --- .../tools/lldb-vscode/lldbvscode_testcase.py | 14 +- .../test/tools/lldb-vscode/vscode.py | 30 +++- .../tools/lldb-vscode/runInTerminal/Makefile | 3 + .../runInTerminal/TestVSCode_runInTerminal.py | 48 +++++ .../tools/lldb-vscode/runInTerminal/main.c | 11 ++ lldb/tools/lldb-vscode/JSONUtils.cpp | 40 +++++ lldb/tools/lldb-vscode/JSONUtils.h | 12 ++ lldb/tools/lldb-vscode/VSCode.cpp | 73 +++++++- lldb/tools/lldb-vscode/VSCode.h | 44 +++++ lldb/tools/lldb-vscode/lldb-vscode.cpp | 167 ++++++++++-------- lldb/tools/lldb-vscode/package.json | 5 + 11 files changed, 365 insertions(+), 82 deletions(-) create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/main.c diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py index fa5a9c0db1ebd..5710751ec34bf 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py @@ -282,7 +282,7 @@ def launch(self, program=None, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, debuggerRoot=None, launchCommands=None, - sourceMap=None, disconnectAutomatically=True): + sourceMap=None, disconnectAutomatically=True, runInTerminal=False): '''Sending launch request to vscode ''' @@ -316,10 +316,16 @@ def cleanup(): sourcePath=sourcePath, debuggerRoot=debuggerRoot, launchCommands=launchCommands, - sourceMap=sourceMap) + sourceMap=sourceMap, + runInTerminal=runInTerminal) if not (response and response['success']): self.assertTrue(response['success'], 'launch failed (%s)' % (response['message'])) + # We need to trigger a request_configurationDone after we've successfully + # attached a runInTerminal process to finish initialization. + if runInTerminal: + self.vscode.request_configurationDone() + def build_and_launch(self, program, args=None, cwd=None, env=None, stopOnEntry=False, disableASLR=True, @@ -327,7 +333,7 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, - debuggerRoot=None): + debuggerRoot=None, runInTerminal=False): '''Build the default Makefile target, create the VSCode debug adaptor, and launch the process. ''' @@ -337,4 +343,4 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, self.launch(program, args, cwd, env, stopOnEntry, disableASLR, disableSTDIO, shellExpandArguments, trace, initCommands, preRunCommands, stopCommands, exitCommands, - terminateCommands, sourcePath, debuggerRoot) + terminateCommands, sourcePath, debuggerRoot, runInTerminal=runInTerminal) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index 6b1c1c961b545..834e33ef5c3da 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -300,12 +300,29 @@ def send_recv(self, command): self.send_packet(command) done = False while not done: - response = self.recv_packet(filter_type='response') - if response is None: + response_or_request = self.recv_packet(filter_type=['response', 'request']) + if response_or_request is None: desc = 'no response for "%s"' % (command['command']) raise ValueError(desc) - self.validate_response(command, response) - return response + if response_or_request['type'] == 'response': + self.validate_response(command, response_or_request) + return response_or_request + else: + if response_or_request['command'] == 'runInTerminal': + subprocess.Popen(response_or_request['arguments']['args'], + env=response_or_request['arguments']['env']) + self.send_packet({ + "type": "response", + "seq": -1, + "request_seq": response_or_request['seq'], + "success": True, + "command": "runInTerminal", + "body": {} + }, set_sequence=False) + else: + desc = 'unkonwn reverse request "%s"' % (response_or_request['command']) + raise ValueError(desc) + return None def wait_for_event(self, filter=None, timeout=None): @@ -599,7 +616,8 @@ def request_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None ,sourcePath=None, - debuggerRoot=None, launchCommands=None, sourceMap=None): + debuggerRoot=None, launchCommands=None, sourceMap=None, + runInTerminal=False): args_dict = { 'program': program } @@ -638,6 +656,8 @@ def request_launch(self, program, args=None, cwd=None, env=None, args_dict['launchCommands'] = launchCommands if sourceMap: args_dict['sourceMap'] = sourceMap + if runInTerminal: + args_dict['runInTerminal'] = runInTerminal command_dict = { 'command': 'launch', 'type': 'request', diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile b/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py new file mode 100644 index 0000000000000..b21871e9ac949 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py @@ -0,0 +1,48 @@ +""" +Test lldb-vscode runInTerminal reverse request +""" + + +import unittest2 +import vscode +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import lldbvscode_testcase +import time +import os + + +class TestVSCode_runInTerminal(lldbvscode_testcase.VSCodeTestCaseBase): + + mydir = TestBase.compute_mydir(__file__) + + @skipIfWindows + @skipIfRemote + def test_runInTerminal(self): + ''' + Tests the "runInTerminal" reverse request. It makes sure that the IDE can + launch the inferior with the correct environment variables and arguments. + ''' + program = self.getBuildArtifact("a.out") + source = 'main.c' + self.build_and_launch(program, stopOnEntry=True, runInTerminal=True, args=["foobar"], env=["FOO=bar"]) + breakpoint_line = line_number(source, '// breakpoint') + + self.set_source_breakpoints(source, [breakpoint_line]) + self.continue_to_next_stop() + + # We verify we actually stopped inside the loop + counter = int(self.vscode.get_local_variable_value('counter')) + self.assertTrue(counter > 0) + + # We verify we were able to set the launch arguments + argc = int(self.vscode.get_local_variable_value('argc')) + self.assertEqual(argc, 2) + + argv1 = self.vscode.request_evaluate('argv[1]')['body']['result'] + self.assertIn('foobar', argv1) + + # We verify we were able to set the environment + env = self.vscode.request_evaluate('foo')['body']['result'] + self.assertIn('bar', env) diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c b/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c new file mode 100644 index 0000000000000..676bd830e657b --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c @@ -0,0 +1,11 @@ +#include +#include +#include + +int main(int argc, char *argv[]) { + const char *foo = getenv("FOO"); + for (int counter = 1;; counter++) { + sleep(1); // breakpoint + } + return 0; +} diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 36156ca2c42f9..044bfd13ec463 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -998,4 +998,44 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit) { return llvm::json::Value(std::move(object)); } +/// See +/// https://microsoft.github.io/debug-adapter-protocol/specification#Reverse_Requests_RunInTerminal +llvm::json::Object +CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request) { + llvm::json::Object reverse_request; + reverse_request.try_emplace("type", "request"); + reverse_request.try_emplace("command", "runInTerminal"); + + llvm::json::Object run_in_terminal_args; + // This indicates the IDE to open an embedded terminal, instead of opening the + // terminal in a new window. + run_in_terminal_args.try_emplace("kind", "integrated"); + + auto launch_request_arguments = launch_request.getObject("arguments"); + std::vector args = GetStrings(launch_request_arguments, "args"); + // The program path must be the first entry in the "args" field + args.insert(args.begin(), + GetString(launch_request_arguments, "program").str()); + run_in_terminal_args.try_emplace("args", args); + + const auto cwd = GetString(launch_request_arguments, "cwd"); + if (!cwd.empty()) + run_in_terminal_args.try_emplace("cwd", cwd); + + // We need to convert the input list of environments variables into a + // dictionary + std::vector envs = GetStrings(launch_request_arguments, "env"); + llvm::json::Object environment; + for (const std::string &env : envs) { + size_t index = env.find("="); + environment.try_emplace(env.substr(0, index), env.substr(index + 1)); + } + run_in_terminal_args.try_emplace("env", + llvm::json::Value(std::move(environment))); + + reverse_request.try_emplace( + "arguments", llvm::json::Value(std::move(run_in_terminal_args))); + return reverse_request; +} + } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h index df4428f390ba2..88cbef9e5fdd4 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.h +++ b/lldb/tools/lldb-vscode/JSONUtils.h @@ -443,6 +443,18 @@ llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference, llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit); +/// Create a runInTerminal reverse request object +/// +/// \param[in] launch_request +/// The original launch_request object whose fields are used to construct +/// the reverse request object. +/// +/// \return +/// A "runInTerminal" JSON object that follows the specification outlined by +/// Microsoft. +llvm::json::Object +CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request); + } // namespace lldb_vscode #endif diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index 537cae7868631..2022f8319534e 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -38,7 +38,8 @@ VSCode::VSCode() {"swift_catch", "Swift Catch", lldb::eLanguageTypeSwift}, {"swift_throw", "Swift Throw", lldb::eLanguageTypeSwift}}), focus_tid(LLDB_INVALID_THREAD_ID), sent_terminated_event(false), - stop_at_entry(false), is_attach(false) { + stop_at_entry(false), is_attach(false), + waiting_for_run_in_terminal(false), reverse_request_seq(0) { const char *log_file_path = getenv("LLDBVSCODE_LOG"); #if defined(_WIN32) // Windows opens stdout and stdin in text mode which converts \n to 13,10 @@ -362,4 +363,74 @@ void VSCode::SetTarget(const lldb::SBTarget target) { } } +PacketStatus VSCode::GetObject(llvm::json::Object &object) { + std::string json = ReadJSON(); + if (json.empty()) + return PacketStatus::EndOfFile; + + llvm::StringRef json_sref(json); + llvm::Expected json_value = llvm::json::parse(json_sref); + if (!json_value) { + auto error = json_value.takeError(); + if (log) { + std::string error_str; + llvm::raw_string_ostream strm(error_str); + strm << error; + strm.flush(); + *log << "error: failed to parse JSON: " << error_str << std::endl + << json << std::endl; + } + return PacketStatus::JSONMalformed; + } + object = *json_value->getAsObject(); + if (!json_value->getAsObject()) { + if (log) + *log << "error: json packet isn't a object" << std::endl; + return PacketStatus::JSONNotObject; + } + return PacketStatus::Success; +} + +bool VSCode::HandleObject(const llvm::json::Object &object) { + const auto packet_type = GetString(object, "type"); + if (packet_type == "request") { + const auto command = GetString(object, "command"); + auto handler_pos = request_handlers.find(std::string(command)); + if (handler_pos != request_handlers.end()) { + handler_pos->second(object); + return true; // Success + } else { + if (log) + *log << "error: unhandled command \"" << command.data() << std::endl; + return false; // Fail + } + } + return false; +} + +PacketStatus VSCode::SendReverseRequest(llvm::json::Object request, + llvm::json::Object &response) { + request.try_emplace("seq", ++reverse_request_seq); + SendJSON(llvm::json::Value(std::move(request))); + bool got_response = false; + while (!got_response) { + PacketStatus status = GetObject(response); + const auto packet_type = GetString(response, "type"); + if (packet_type == "response") { + if (status == PacketStatus::Success) { + return status; + // Not our response, we got another packet + HandleObject(response); + } else { + return status; + } + } + } +} + +void VSCode::RegisterRequestCallback(std::string request, + RequestCallback callback) { + request_handlers[request] = callback; +} + } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h index 88a0c08de2454..874feb9a97ce4 100644 --- a/lldb/tools/lldb-vscode/VSCode.h +++ b/lldb/tools/lldb-vscode/VSCode.h @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" #include "lldb/API/SBAttachInfo.h" @@ -65,6 +66,15 @@ enum class OutputType { Console, Stdout, Stderr, Telemetry }; enum VSCodeBroadcasterBits { eBroadcastBitStopEventThread = 1u << 0 }; +typedef void (*RequestCallback)(const llvm::json::Object &command); + +enum class PacketStatus { + Success = 0, + EndOfFile, + JSONMalformed, + JSONNotObject +}; + struct VSCode { InputStream input; OutputStream output; @@ -91,6 +101,10 @@ struct VSCode { bool sent_terminated_event; bool stop_at_entry; bool is_attach; + uint32_t reverse_request_seq; + std::map request_handlers; + std::condition_variable request_in_terminal_cv; + bool waiting_for_run_in_terminal; // Keep track of the last stop thread index IDs as threads won't go away // unless we send a "thread" event to indicate the thread exited. llvm::DenseSet thread_ids; @@ -152,6 +166,36 @@ struct VSCode { /// Set given target object as a current target for lldb-vscode and start /// listeing for its breakpoint events. void SetTarget(const lldb::SBTarget target); + + const std::map &GetRequestHandlers(); + + PacketStatus GetObject(llvm::json::Object &object); + bool HandleObject(const llvm::json::Object &object); + + /// Send a Debug Adapter Protocol reverse request to the IDE + /// + /// \param[in] request + /// The payload of the request to send. + /// + /// \param[out] response + /// The response of the IDE. It might be undefined if there was an error. + /// + /// \return + /// A \a PacketStatus object indicating the sucess or failure of the + /// request. + PacketStatus SendReverseRequest(llvm::json::Object request, + llvm::json::Object &response); + + /// Registers a callback handler for a Debug Adapter Protocol request + /// + /// \param[in] request + /// The name of the request following the Debug Adapter Protocol + /// specification. + /// + /// \param[in] callback + /// The callback to execute when the given request is triggered by the + /// IDE. + void RegisterRequestCallback(std::string request, RequestCallback callback); }; extern VSCode g_vsc; diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 54f2e653d0697..ee01822ba6217 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -384,7 +384,12 @@ void EventThreadFunction() { break; case lldb::eStateSuspended: break; - case lldb::eStateStopped: + case lldb::eStateStopped: { + if (g_vsc.waiting_for_run_in_terminal) { + g_vsc.waiting_for_run_in_terminal = false; + g_vsc.request_in_terminal_cv.notify_one(); + } + } // Only report a stopped event if the process was not restarted. if (!lldb::SBProcess::GetRestartedFromEvent(event)) { SendStdOutStdErr(process); @@ -1374,6 +1379,9 @@ void request_initialize(const llvm::json::Object &request) { filters.emplace_back(CreateExceptionBreakpointFilter(exc_bp)); } body.try_emplace("exceptionBreakpointFilters", std::move(filters)); + // The debug adapter supports launching a debugee in intergrated VSCode + // terminal. + body.try_emplace("supportsRunInTerminalRequest", true); // The debug adapter supports stepping back via the stepBack and // reverseContinue requests. body.try_emplace("supportsStepBack", false); @@ -1433,6 +1441,49 @@ void request_initialize(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } +void request_runInTerminal(const llvm::json::Object &launch_request, + llvm::json::Object &launch_response) { + // We have already created a target that has a valid "program" path to the + // executable. We will attach to the next process whose name matches that + // of the target's. + g_vsc.is_attach = true; + lldb::SBAttachInfo attach_info; + lldb::SBError error; + attach_info.SetWaitForLaunch(true, /*async*/ true); + g_vsc.target.Attach(attach_info, error); + + llvm::json::Object reverse_request = + CreateRunInTerminalReverseRequest(launch_request); + llvm::json::Object reverse_response; + lldb_vscode::PacketStatus status = + g_vsc.SendReverseRequest(reverse_request, reverse_response); + if (status != lldb_vscode::PacketStatus::Success) + error.SetErrorString("Process cannot be launched by IDE."); + + if (error.Success()) { + // Wait for the attach stop event to happen or for a timeout. + g_vsc.waiting_for_run_in_terminal = true; + static std::mutex mutex; + std::unique_lock locker(mutex); + g_vsc.request_in_terminal_cv.wait_for(locker, std::chrono::seconds(10)); + + auto attached_pid = g_vsc.target.GetProcess().GetProcessID(); + if (attached_pid == LLDB_INVALID_PROCESS_ID) + error.SetErrorString("Failed to attach to a process"); + else + SendProcessEvent(Attach); + } + + if (error.Fail()) { + launch_response["success"] = llvm::json::Value(false); + EmplaceSafeString(launch_response, "message", + std::string(error.GetCString())); + } else { + launch_response["success"] = llvm::json::Value(true); + g_vsc.SendJSON(CreateEventObject("initialized")); + } +} + // "LaunchRequest": { // "allOf": [ { "$ref": "#/definitions/Request" }, { // "type": "object", @@ -1505,6 +1556,12 @@ void request_launch(const llvm::json::Object &request) { return; } + if (GetBoolean(arguments, "runInTerminal", false)) { + request_runInTerminal(request, response); + g_vsc.SendJSON(llvm::json::Value(std::move(response))); + return; + } + // Instantiate a launch info instance for the target. auto launch_info = g_vsc.target.GetLaunchInfo(); @@ -2831,39 +2888,35 @@ void request__testGetTargetBreakpoints(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } -const std::map &GetRequestHandlers() { -#define REQUEST_CALLBACK(name) \ - { #name, request_##name } - static std::map g_request_handlers = { - // VSCode Debug Adaptor requests - REQUEST_CALLBACK(attach), - REQUEST_CALLBACK(completions), - REQUEST_CALLBACK(continue), - REQUEST_CALLBACK(configurationDone), - REQUEST_CALLBACK(disconnect), - REQUEST_CALLBACK(evaluate), - REQUEST_CALLBACK(exceptionInfo), - REQUEST_CALLBACK(getCompileUnits), - REQUEST_CALLBACK(initialize), - REQUEST_CALLBACK(launch), - REQUEST_CALLBACK(next), - REQUEST_CALLBACK(pause), - REQUEST_CALLBACK(scopes), - REQUEST_CALLBACK(setBreakpoints), - REQUEST_CALLBACK(setExceptionBreakpoints), - REQUEST_CALLBACK(setFunctionBreakpoints), - REQUEST_CALLBACK(setVariable), - REQUEST_CALLBACK(source), - REQUEST_CALLBACK(stackTrace), - REQUEST_CALLBACK(stepIn), - REQUEST_CALLBACK(stepOut), - REQUEST_CALLBACK(threads), - REQUEST_CALLBACK(variables), - // Testing requests - REQUEST_CALLBACK(_testGetTargetBreakpoints), - }; -#undef REQUEST_CALLBACK - return g_request_handlers; +void RegisterRequestCallbacks() { + g_vsc.RegisterRequestCallback("attach", request_attach); + g_vsc.RegisterRequestCallback("completions", request_completions); + g_vsc.RegisterRequestCallback("continue", request_continue); + g_vsc.RegisterRequestCallback("configurationDone", request_configurationDone); + g_vsc.RegisterRequestCallback("disconnect", request_disconnect); + g_vsc.RegisterRequestCallback("evaluate", request_evaluate); + g_vsc.RegisterRequestCallback("exceptionInfo", request_exceptionInfo); + g_vsc.RegisterRequestCallback("getCompileUnits", request_getCompileUnits); + g_vsc.RegisterRequestCallback("initialize", request_initialize); + g_vsc.RegisterRequestCallback("launch", request_launch); + g_vsc.RegisterRequestCallback("next", request_next); + g_vsc.RegisterRequestCallback("pause", request_pause); + g_vsc.RegisterRequestCallback("scopes", request_scopes); + g_vsc.RegisterRequestCallback("setBreakpoints", request_setBreakpoints); + g_vsc.RegisterRequestCallback("setExceptionBreakpoints", + request_setExceptionBreakpoints); + g_vsc.RegisterRequestCallback("setFunctionBreakpoints", + request_setFunctionBreakpoints); + g_vsc.RegisterRequestCallback("setVariable", request_setVariable); + g_vsc.RegisterRequestCallback("source", request_source); + g_vsc.RegisterRequestCallback("stackTrace", request_stackTrace); + g_vsc.RegisterRequestCallback("stepIn", request_stepIn); + g_vsc.RegisterRequestCallback("stepOut", request_stepOut); + g_vsc.RegisterRequestCallback("threads", request_threads); + g_vsc.RegisterRequestCallback("variables", request_variables); + // Testing requests + g_vsc.RegisterRequestCallback("_testGetTargetBreakpoints", + request__testGetTargetBreakpoints); } } // anonymous namespace @@ -2895,6 +2948,8 @@ int main(int argc, char *argv[]) { // Initialize LLDB first before we do anything. lldb::SBDebugger::Initialize(); + RegisterRequestCallbacks(); + int portno = -1; LLDBVSCodeOptTable T; @@ -2937,49 +2992,17 @@ int main(int argc, char *argv[]) { g_vsc.output.descriptor = StreamDescriptor::from_file(fileno(stdout), false); } - auto request_handlers = GetRequestHandlers(); uint32_t packet_idx = 0; while (!g_vsc.sent_terminated_event) { - std::string json = g_vsc.ReadJSON(); - if (json.empty()) + llvm::json::Object object; + lldb_vscode::PacketStatus status = g_vsc.GetObject(object); + if (status == lldb_vscode::PacketStatus::EndOfFile) break; + if (status != lldb_vscode::PacketStatus::Success) + return 1; // Fatal error - llvm::StringRef json_sref(json); - llvm::Expected json_value = llvm::json::parse(json_sref); - if (!json_value) { - auto error = json_value.takeError(); - if (g_vsc.log) { - std::string error_str; - llvm::raw_string_ostream strm(error_str); - strm << error; - strm.flush(); - - *g_vsc.log << "error: failed to parse JSON: " << error_str << std::endl - << json << std::endl; - } - return 1; - } - - auto object = json_value->getAsObject(); - if (!object) { - if (g_vsc.log) - *g_vsc.log << "error: json packet isn't a object" << std::endl; + if (!g_vsc.HandleObject(object)) return 1; - } - - const auto packet_type = GetString(object, "type"); - if (packet_type == "request") { - const auto command = GetString(object, "command"); - auto handler_pos = request_handlers.find(std::string(command)); - if (handler_pos != request_handlers.end()) { - handler_pos->second(*object); - } else { - if (g_vsc.log) - *g_vsc.log << "error: unhandled command \"" << command.data() - << std::endl; - return 1; - } - } ++packet_idx; } diff --git a/lldb/tools/lldb-vscode/package.json b/lldb/tools/lldb-vscode/package.json index 29ca06dd17d63..9077ab51dd7fa 100644 --- a/lldb/tools/lldb-vscode/package.json +++ b/lldb/tools/lldb-vscode/package.json @@ -175,6 +175,11 @@ "type": "array", "description": "Commands executed at the end of debugging session.", "default": [] + }, + "runInTerminal": { + "type": "boolean", + "description": "Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs", + "default": false } } }, From f09ccf89fbee976bcca77b374f69987c2e96e1ce Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Wed, 2 Sep 2020 09:51:30 -0700 Subject: [PATCH 050/465] [ThinLTO] Fix a metadata lost issue with DICompileUnit import. For ThinLTO importing we don't need to import all the fields of the DICompileUnit, such as enums, macros, retained types lists. The importation of those fields were previously disabled by setting their value map entries to nullptr. Unfortunately a metadata node can be shared by multiple metadata operands. Setting the map entry to nullptr might result in not importing other metadata unexpectedly. The issue is fixed by explicitly setting the original DICompileUnit fields (still a copy of the source module metadata) to null. Reviewed By: wenlei, dblaikie Differential Revision: https://reviews.llvm.org/D86675 --- llvm/lib/Linker/IRMover.cpp | 17 ++++---- .../ThinLTO/X86/Inputs/import-metadata.ll | 23 +++++++++++ llvm/test/ThinLTO/X86/import-metadata.ll | 40 +++++++++++++++++++ 3 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 llvm/test/ThinLTO/X86/Inputs/import-metadata.ll create mode 100644 llvm/test/ThinLTO/X86/import-metadata.ll diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index 055689b16e8f4..186ddb3d2b81b 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1126,14 +1126,13 @@ void IRLinker::prepareCompileUnitsForImport() { assert(CU && "Expected valid compile unit"); // Enums, macros, and retained types don't need to be listed on the // imported DICompileUnit. This means they will only be imported - // if reached from the mapped IR. Do this by setting their value map - // entries to nullptr, which will automatically prevent their importing - // when reached from the DICompileUnit during metadata mapping. - ValueMap.MD()[CU->getRawEnumTypes()].reset(nullptr); - ValueMap.MD()[CU->getRawMacros()].reset(nullptr); - ValueMap.MD()[CU->getRawRetainedTypes()].reset(nullptr); + // if reached from the mapped IR. + CU->replaceEnumTypes(nullptr); + CU->replaceMacros(nullptr); + CU->replaceRetainedTypes(nullptr); + // The original definition (or at least its debug info - if the variable is - // internalized an optimized away) will remain in the source module, so + // internalized and optimized away) will remain in the source module, so // there's no need to import them. // If LLVM ever does more advanced optimizations on global variables // (removing/localizing write operations, for instance) that can track @@ -1141,7 +1140,7 @@ void IRLinker::prepareCompileUnitsForImport() { // with care when it comes to debug info size. Emitting small CUs containing // only a few imported entities into every destination module may be very // size inefficient. - ValueMap.MD()[CU->getRawGlobalVariables()].reset(nullptr); + CU->replaceGlobalVariables(nullptr); // Imported entities only need to be mapped in if they have local // scope, as those might correspond to an imported entity inside a @@ -1174,7 +1173,7 @@ void IRLinker::prepareCompileUnitsForImport() { else // If there were no local scope imported entities, we can map // the whole list to nullptr. - ValueMap.MD()[CU->getRawImportedEntities()].reset(nullptr); + CU->replaceImportedEntities(nullptr); } } } diff --git a/llvm/test/ThinLTO/X86/Inputs/import-metadata.ll b/llvm/test/ThinLTO/X86/Inputs/import-metadata.ll new file mode 100644 index 0000000000000..d8be887928a2d --- /dev/null +++ b/llvm/test/ThinLTO/X86/Inputs/import-metadata.ll @@ -0,0 +1,23 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +define i32 @foo(i32 %goo) { +entry: + %goo.addr = alloca i32, align 4 + store i32 %goo, i32* %goo.addr, align 4 + %0 = load i32, i32* %goo.addr, align 4 + %1 = load i32, i32* %goo.addr, align 4 + %mul = mul nsw i32 %0, %1 + ret i32 %mul +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.md = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, enums: !4) +!1 = !DIFile(filename: "foo.cpp", directory: "tmp") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{} +!5 = !{!4} diff --git a/llvm/test/ThinLTO/X86/import-metadata.ll b/llvm/test/ThinLTO/X86/import-metadata.ll new file mode 100644 index 0000000000000..f938fdd5c93c9 --- /dev/null +++ b/llvm/test/ThinLTO/X86/import-metadata.ll @@ -0,0 +1,40 @@ +; RUN: opt -thinlto-bc %s -o %t1.bc +; RUN: opt -thinlto-bc %p/Inputs/import-metadata.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t1.bc %t2.bc -o %t-out \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,foo,l \ +; RUN: -r=%t2.bc,foo,pl +; RUN: llvm-dis %t-out.1.3.import.bc -o - | FileCheck %s + +;; Check the imported DICompileUnit doesn't have the enums operand. +;; Also check the imported md metadata that shares a node with the +;; enums operand originally is not null. + +; CHECK: !llvm.dbg.cu = !{![[#CU1:]], ![[#CU2:]]} +;; Note that MD1 comes from the current module. MD2 is from the imported module. +;; We are checking if the imported MD2 doesn't end up having a null operand. +; CHECK: !llvm.md = !{![[#MD1:]], ![[#MD2:]]} +; CHECK: ![[#MD3:]] = !{} +; CHECK: ![[#CU2]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: ![[#FILE2:]], isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) +; CHECK: ![[#MD2]] = !{![[#MD3]]} + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +declare i32 @foo(i32 %goo) + +define i32 @main() { + call i32 @foo(i32 0) + ret i32 0 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.md = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, enums: !4) +!1 = !DIFile(filename: "main.cpp", directory: "tmp") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{} +!5 = !{!4} From 553bfc8fa17f8c8a2bc414af1a403ea4abc65fde Mon Sep 17 00:00:00 2001 From: Diego Caballero Date: Thu, 3 Sep 2020 00:29:04 +0300 Subject: [PATCH 051/465] [mlir][Affine] Support affine vector loads/stores in LICM Make use of affine memory op interfaces in AffineLoopInvariantCodeMotion so that it can also work on affine.vector_load and affine.vector_store ops. Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D86986 --- .../AffineLoopInvariantCodeMotion.cpp | 13 ++--- .../affine-loop-invariant-code-motion.mlir | 47 +++++++++++++++++++ 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp index df8d875292148..133fef4f0a3f1 100644 --- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp @@ -63,7 +63,7 @@ areAllOpsInTheBlockListInvariant(Region &blockList, Value indVar, static bool isMemRefDereferencingOp(Operation &op) { // TODO: Support DMA Ops. - return isa(op); + return isa(op); } // Returns true if the individual op is loop invariant. @@ -90,9 +90,9 @@ bool isOpLoopInvariant(Operation &op, Value indVar, definedOps.insert(&op); if (isMemRefDereferencingOp(op)) { - Value memref = isa(op) - ? cast(op).getMemRef() - : cast(op).getMemRef(); + Value memref = isa(op) + ? cast(op).getMemRef() + : cast(op).getMemRef(); for (auto *user : memref.getUsers()) { // If this memref has a user that is a DMA, give up because these // operations write to this memref. @@ -102,8 +102,9 @@ bool isOpLoopInvariant(Operation &op, Value indVar, // If the memref used by the load/store is used in a store elsewhere in // the loop nest, we do not hoist. Similarly, if the memref used in a // load is also being stored too, we do not hoist the load. - if (isa(user) || - (isa(user) && isa(op))) { + if (isa(user) || + (isa(user) && + isa(op))) { if (&op != user) { SmallVector userIVs; getLoopIVs(*user, &userIVs); diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir index 8a2ed32757e44..4256dcc0614bf 100644 --- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir +++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir @@ -566,3 +566,50 @@ func @do_not_hoist_dependent_side_effect_free_op(%arg0: memref<10x512xf32>) { // CHECK-NEXT: affine.load // CHECK-NEXT: mulf // CHECK-NEXT: } + +// ----- + +// CHECK-LABEL: func @vector_loop_nothing_invariant +func @vector_loop_nothing_invariant() { + %m1 = alloc() : memref<40xf32> + %m2 = alloc() : memref<40xf32> + affine.for %arg0 = 0 to 10 { + %v0 = affine.vector_load %m1[%arg0*4] : memref<40xf32>, vector<4xf32> + %v1 = affine.vector_load %m2[%arg0*4] : memref<40xf32>, vector<4xf32> + %v2 = addf %v0, %v1 : vector<4xf32> + affine.vector_store %v2, %m1[%arg0*4] : memref<40xf32>, vector<4xf32> + } + return +} + +// CHECK: affine.for +// CHECK-NEXT: affine.vector_load +// CHECK-NEXT: affine.vector_load +// CHECK-NEXT: addf +// CHECK-NEXT: affine.vector_store +// CHECK-NEXT: } + +// ----- + +// CHECK-LABEL: func @vector_loop_all_invariant +func @vector_loop_all_invariant() { + %m1 = alloc() : memref<4xf32> + %m2 = alloc() : memref<4xf32> + %m3 = alloc() : memref<4xf32> + affine.for %arg0 = 0 to 10 { + %v0 = affine.vector_load %m1[0] : memref<4xf32>, vector<4xf32> + %v1 = affine.vector_load %m2[0] : memref<4xf32>, vector<4xf32> + %v2 = addf %v0, %v1 : vector<4xf32> + affine.vector_store %v2, %m3[0] : memref<4xf32>, vector<4xf32> + } + return +} + +// CHECK: alloc() +// CHECK-NEXT: alloc() +// CHECK-NEXT: alloc() +// CHECK-NEXT: affine.vector_load +// CHECK-NEXT: affine.vector_load +// CHECK-NEXT: addf +// CHECK-NEXT: affine.vector_store +// CHECK-NEXT: affine.for From 1284dc34abd11ce4275ad21c0470ad8c679b59b7 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 2 Sep 2020 20:09:07 +0000 Subject: [PATCH 052/465] Use an Identifier instead of an OperationName internally for OpPassManager identification (NFC) This allows to defers the check for traits to the execution instead of forcing it on the pipeline creation. In particular, this is making our pipeline creation tolerant to dialects not being loaded in the context yet. Reviewed By: rriddle, GMNGeoffrey Differential Revision: https://reviews.llvm.org/D86915 --- mlir/include/mlir/Pass/PassInstrumentation.h | 10 +-- mlir/include/mlir/Pass/PassManager.h | 10 +-- mlir/lib/Pass/Pass.cpp | 71 +++++++++++--------- mlir/lib/Pass/PassStatistics.cpp | 2 +- mlir/lib/Pass/PassTiming.cpp | 10 +-- mlir/unittests/Pass/PassManagerTest.cpp | 43 ++++++++++++ 6 files changed, 97 insertions(+), 49 deletions(-) diff --git a/mlir/include/mlir/Pass/PassInstrumentation.h b/mlir/include/mlir/Pass/PassInstrumentation.h index dc648b2b0edfb..baf230f086fd1 100644 --- a/mlir/include/mlir/Pass/PassInstrumentation.h +++ b/mlir/include/mlir/Pass/PassInstrumentation.h @@ -9,12 +9,12 @@ #ifndef MLIR_PASS_PASSINSTRUMENTATION_H_ #define MLIR_PASS_PASSINSTRUMENTATION_H_ +#include "mlir/IR/Identifier.h" #include "mlir/Support/LLVM.h" #include "mlir/Support/TypeID.h" namespace mlir { class Operation; -class OperationName; class Pass; namespace detail { @@ -43,13 +43,13 @@ class PassInstrumentation { /// A callback to run before a pass pipeline is executed. This function takes /// the name of the operation type being operated on, and information related /// to the parent that spawned this pipeline. - virtual void runBeforePipeline(const OperationName &name, + virtual void runBeforePipeline(Identifier name, const PipelineParentInfo &parentInfo) {} /// A callback to run after a pass pipeline has executed. This function takes /// the name of the operation type being operated on, and information related /// to the parent that spawned this pipeline. - virtual void runAfterPipeline(const OperationName &name, + virtual void runAfterPipeline(Identifier name, const PipelineParentInfo &parentInfo) {} /// A callback to run before a pass is executed. This function takes a pointer @@ -90,12 +90,12 @@ class PassInstrumentor { /// See PassInstrumentation::runBeforePipeline for details. void - runBeforePipeline(const OperationName &name, + runBeforePipeline(Identifier name, const PassInstrumentation::PipelineParentInfo &parentInfo); /// See PassInstrumentation::runAfterPipeline for details. void - runAfterPipeline(const OperationName &name, + runAfterPipeline(Identifier name, const PassInstrumentation::PipelineParentInfo &parentInfo); /// See PassInstrumentation::runBeforePass for details. diff --git a/mlir/include/mlir/Pass/PassManager.h b/mlir/include/mlir/Pass/PassManager.h index e19a1fab7f130..8addd9809f90a 100644 --- a/mlir/include/mlir/Pass/PassManager.h +++ b/mlir/include/mlir/Pass/PassManager.h @@ -26,9 +26,9 @@ class Any; namespace mlir { class AnalysisManager; +class Identifier; class MLIRContext; class ModuleOp; -class OperationName; class Operation; class Pass; class PassInstrumentation; @@ -47,7 +47,7 @@ struct OpPassManagerImpl; /// other OpPassManagers or the top-level PassManager. class OpPassManager { public: - OpPassManager(OperationName name, bool verifyPasses); + OpPassManager(Identifier name, MLIRContext *context, bool verifyPasses); OpPassManager(OpPassManager &&rhs); OpPassManager(const OpPassManager &rhs); ~OpPassManager(); @@ -70,10 +70,10 @@ class OpPassManager { /// Nest a new operation pass manager for the given operation kind under this /// pass manager. - OpPassManager &nest(const OperationName &nestedName); + OpPassManager &nest(Identifier nestedName); OpPassManager &nest(StringRef nestedName); template OpPassManager &nest() { - return nest(OpT::getOperationName()); + return nest(Identifier::get(OpT::getOperationName(), getContext())); } /// Add the given pass to this pass manager. If this pass has a concrete @@ -93,7 +93,7 @@ class OpPassManager { MLIRContext *getContext() const; /// Return the operation name that this pass manager operates on. - const OperationName &getOpName() const; + Identifier getOpName() const; /// Returns the internal implementation instance. detail::OpPassManagerImpl &getImpl(); diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index bb521633b5f3f..d3cf62574afda 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -92,17 +92,17 @@ void VerifierPass::runOnOperation() { namespace mlir { namespace detail { struct OpPassManagerImpl { - OpPassManagerImpl(OperationName name, bool verifyPasses) - : name(name), verifyPasses(verifyPasses) {} + OpPassManagerImpl(Identifier name, MLIRContext *ctx, bool verifyPasses) + : name(name), context(ctx), verifyPasses(verifyPasses) {} /// Merge the passes of this pass manager into the one provided. void mergeInto(OpPassManagerImpl &rhs); /// Nest a new operation pass manager for the given operation kind under this /// pass manager. - OpPassManager &nest(const OperationName &nestedName); + OpPassManager &nest(Identifier nestedName); OpPassManager &nest(StringRef nestedName) { - return nest(OperationName(nestedName, getContext())); + return nest(Identifier::get(nestedName, getContext())); } /// Add the given pass to this pass manager. If this pass has a concrete @@ -118,12 +118,13 @@ struct OpPassManagerImpl { void splitAdaptorPasses(); /// Return an instance of the context. - MLIRContext *getContext() const { - return name.getAbstractOperation()->dialect.getContext(); - } + MLIRContext *getContext() const { return context; } /// The name of the operation that passes of this pass manager operate on. - OperationName name; + Identifier name; + + /// The current context for this pass manager + MLIRContext *context; /// Flag that specifies if the IR should be verified after each pass has run. bool verifyPasses : 1; @@ -141,8 +142,8 @@ void OpPassManagerImpl::mergeInto(OpPassManagerImpl &rhs) { passes.clear(); } -OpPassManager &OpPassManagerImpl::nest(const OperationName &nestedName) { - OpPassManager nested(nestedName, verifyPasses); +OpPassManager &OpPassManagerImpl::nest(Identifier nestedName) { + OpPassManager nested(nestedName, getContext(), verifyPasses); auto *adaptor = new OpToOpPassAdaptor(std::move(nested)); addPass(std::unique_ptr(adaptor)); return adaptor->getPassManagers().front(); @@ -152,7 +153,7 @@ void OpPassManagerImpl::addPass(std::unique_ptr pass) { // If this pass runs on a different operation than this pass manager, then // implicitly nest a pass manager for this operation. auto passOpName = pass->getOpName(); - if (passOpName && passOpName != name.getStringRef()) + if (passOpName && passOpName != name.strref()) return nest(*passOpName).addPass(std::move(pass)); passes.emplace_back(std::move(pass)); @@ -239,19 +240,14 @@ void OpPassManagerImpl::splitAdaptorPasses() { // OpPassManager //===----------------------------------------------------------------------===// -OpPassManager::OpPassManager(OperationName name, bool verifyPasses) - : impl(new OpPassManagerImpl(name, verifyPasses)) { - assert(name.getAbstractOperation() && - "OpPassManager can only operate on registered operations"); - assert(name.getAbstractOperation()->hasProperty( - OperationProperty::IsolatedFromAbove) && - "OpPassManager only supports operating on operations marked as " - "'IsolatedFromAbove'"); -} +OpPassManager::OpPassManager(Identifier name, MLIRContext *context, + bool verifyPasses) + : impl(new OpPassManagerImpl(name, context, verifyPasses)) {} OpPassManager::OpPassManager(OpPassManager &&rhs) : impl(std::move(rhs.impl)) {} OpPassManager::OpPassManager(const OpPassManager &rhs) { *this = rhs; } OpPassManager &OpPassManager::operator=(const OpPassManager &rhs) { - impl.reset(new OpPassManagerImpl(rhs.impl->name, rhs.impl->verifyPasses)); + impl.reset(new OpPassManagerImpl(rhs.impl->name, rhs.impl->getContext(), + rhs.impl->verifyPasses)); for (auto &pass : rhs.impl->passes) impl->passes.emplace_back(pass->clone()); return *this; @@ -275,7 +271,7 @@ OpPassManager::const_pass_iterator OpPassManager::end() const { /// Nest a new operation pass manager for the given operation kind under this /// pass manager. -OpPassManager &OpPassManager::nest(const OperationName &nestedName) { +OpPassManager &OpPassManager::nest(Identifier nestedName) { return impl->nest(nestedName); } OpPassManager &OpPassManager::nest(StringRef nestedName) { @@ -298,7 +294,7 @@ OpPassManagerImpl &OpPassManager::getImpl() { return *impl; } MLIRContext *OpPassManager::getContext() const { return impl->getContext(); } /// Return the operation name that this pass manager operates on. -const OperationName &OpPassManager::getOpName() const { return impl->name; } +Identifier OpPassManager::getOpName() const { return impl->name; } /// Prints out the given passes as the textual representation of a pipeline. static void printAsTextualPipeline(ArrayRef> passes, @@ -336,6 +332,14 @@ void OpPassManager::getDependentDialects(DialectRegistry &dialects) const { LogicalResult OpToOpPassAdaptor::run(Pass *pass, Operation *op, AnalysisManager am) { + if (!op->getName().getAbstractOperation()) + return op->emitOpError() + << "trying to schedule a pass on an unregistered operation"; + if (!op->getName().getAbstractOperation()->hasProperty( + OperationProperty::IsolatedFromAbove)) + return op->emitOpError() << "trying to schedule a pass on an operation not " + "marked as 'IsolatedFromAbove'"; + pass->passState.emplace(op, am); // Instrument before the pass has run. @@ -385,7 +389,7 @@ LogicalResult OpToOpPassAdaptor::runPipeline( /// Find an operation pass manager that can operate on an operation of the given /// type, or nullptr if one does not exist. static OpPassManager *findPassManagerFor(MutableArrayRef mgrs, - const OperationName &name) { + Identifier name) { auto it = llvm::find_if( mgrs, [&](OpPassManager &mgr) { return mgr.getOpName() == name; }); return it == mgrs.end() ? nullptr : &*it; @@ -417,8 +421,8 @@ void OpToOpPassAdaptor::mergeInto(OpToOpPassAdaptor &rhs) { // After coalescing, sort the pass managers within rhs by name. llvm::array_pod_sort(rhs.mgrs.begin(), rhs.mgrs.end(), [](const OpPassManager *lhs, const OpPassManager *rhs) { - return lhs->getOpName().getStringRef().compare( - rhs->getOpName().getStringRef()); + return lhs->getOpName().strref().compare( + rhs->getOpName().strref()); }); } @@ -450,7 +454,7 @@ void OpToOpPassAdaptor::runOnOperationImpl() { for (auto ®ion : getOperation()->getRegions()) { for (auto &block : region) { for (auto &op : block) { - auto *mgr = findPassManagerFor(mgrs, op.getName()); + auto *mgr = findPassManagerFor(mgrs, op.getName().getIdentifier()); if (!mgr) continue; @@ -494,8 +498,8 @@ void OpToOpPassAdaptor::runOnOperationAsyncImpl() { for (auto ®ion : getOperation()->getRegions()) { for (auto &block : region) { for (auto &op : block) { - // Add this operation iff the name matches the any of the pass managers. - if (findPassManagerFor(mgrs, op.getName())) + // Add this operation iff the name matches any of the pass managers. + if (findPassManagerFor(mgrs, op.getName().getIdentifier())) opAMPairs.emplace_back(&op, am.nest(&op)); } } @@ -531,7 +535,8 @@ void OpToOpPassAdaptor::runOnOperationAsyncImpl() { // Get the pass manager for this operation and execute it. auto &it = opAMPairs[nextID]; - auto *pm = findPassManagerFor(pms, it.first->getName()); + auto *pm = + findPassManagerFor(pms, it.first->getName().getIdentifier()); assert(pm && "expected valid pass manager for operation"); if (instrumentor) @@ -732,7 +737,7 @@ PassManager::runWithCrashRecovery(MutableArrayRef> passes, //===----------------------------------------------------------------------===// PassManager::PassManager(MLIRContext *ctx, bool verifyPasses) - : OpPassManager(OperationName(ModuleOp::getOperationName(), ctx), + : OpPassManager(Identifier::get(ModuleOp::getOperationName(), ctx), ctx, verifyPasses), passTiming(false), localReproducer(false) {} @@ -870,7 +875,7 @@ PassInstrumentor::~PassInstrumentor() {} /// See PassInstrumentation::runBeforePipeline for details. void PassInstrumentor::runBeforePipeline( - const OperationName &name, + Identifier name, const PassInstrumentation::PipelineParentInfo &parentInfo) { llvm::sys::SmartScopedLock instrumentationLock(impl->mutex); for (auto &instr : impl->instrumentations) @@ -879,7 +884,7 @@ void PassInstrumentor::runBeforePipeline( /// See PassInstrumentation::runAfterPipeline for details. void PassInstrumentor::runAfterPipeline( - const OperationName &name, + Identifier name, const PassInstrumentation::PipelineParentInfo &parentInfo) { llvm::sys::SmartScopedLock instrumentationLock(impl->mutex); for (auto &instr : llvm::reverse(impl->instrumentations)) diff --git a/mlir/lib/Pass/PassStatistics.cpp b/mlir/lib/Pass/PassStatistics.cpp index 6ef0d3bbea6a8..3721230b6913d 100644 --- a/mlir/lib/Pass/PassStatistics.cpp +++ b/mlir/lib/Pass/PassStatistics.cpp @@ -116,7 +116,7 @@ static void printResultsAsPipeline(raw_ostream &os, OpPassManager &pm) { // Print each of the children passes. for (OpPassManager &mgr : mgrs) { - auto name = ("'" + mgr.getOpName().getStringRef() + "' Pipeline").str(); + auto name = ("'" + mgr.getOpName().strref() + "' Pipeline").str(); printPassEntry(os, indent, name); for (Pass &pass : mgr.getPasses()) printPass(indent + 2, &pass); diff --git a/mlir/lib/Pass/PassTiming.cpp b/mlir/lib/Pass/PassTiming.cpp index 71bf822a864bc..e3978751c11ca 100644 --- a/mlir/lib/Pass/PassTiming.cpp +++ b/mlir/lib/Pass/PassTiming.cpp @@ -165,9 +165,9 @@ struct PassTiming : public PassInstrumentation { ~PassTiming() override { print(); } /// Setup the instrumentation hooks. - void runBeforePipeline(const OperationName &name, + void runBeforePipeline(Identifier name, const PipelineParentInfo &parentInfo) override; - void runAfterPipeline(const OperationName &name, + void runAfterPipeline(Identifier name, const PipelineParentInfo &parentInfo) override; void runBeforePass(Pass *pass, Operation *) override { startPassTimer(pass); } void runAfterPass(Pass *pass, Operation *) override; @@ -242,15 +242,15 @@ struct PassTiming : public PassInstrumentation { }; } // end anonymous namespace -void PassTiming::runBeforePipeline(const OperationName &name, +void PassTiming::runBeforePipeline(Identifier name, const PipelineParentInfo &parentInfo) { // We don't actually want to time the pipelines, they gather their total // from their held passes. getTimer(name.getAsOpaquePointer(), TimerKind::Pipeline, - [&] { return ("'" + name.getStringRef() + "' Pipeline").str(); }); + [&] { return ("'" + name.strref() + "' Pipeline").str(); }); } -void PassTiming::runAfterPipeline(const OperationName &name, +void PassTiming::runAfterPipeline(Identifier name, const PipelineParentInfo &parentInfo) { // Pop the timer for the pipeline. auto tid = llvm::get_threadid(); diff --git a/mlir/unittests/Pass/PassManagerTest.cpp b/mlir/unittests/Pass/PassManagerTest.cpp index 29086a2994e8b..99d4972ef63c0 100644 --- a/mlir/unittests/Pass/PassManagerTest.cpp +++ b/mlir/unittests/Pass/PassManagerTest.cpp @@ -74,4 +74,47 @@ TEST(PassManagerTest, OpSpecificAnalysis) { } } +namespace { +struct InvalidPass : Pass { + InvalidPass() : Pass(TypeID::get(), StringRef("invalid_op")) {} + StringRef getName() const override { return "Invalid Pass"; } + void runOnOperation() override {} + + /// A clone method to create a copy of this pass. + std::unique_ptr clonePass() const override { + return std::make_unique( + *static_cast(this)); + } +}; +} // anonymous namespace + +TEST(PassManagerTest, InvalidPass) { + MLIRContext context; + + // Create a module + OwningModuleRef module(ModuleOp::create(UnknownLoc::get(&context))); + + // Add a single "invalid_op" operation + OpBuilder builder(&module->getBodyRegion()); + OperationState state(UnknownLoc::get(&context), "invalid_op"); + builder.insert(Operation::create(state)); + + // Register a diagnostic handler to capture the diagnostic so that we can + // check it later. + std::unique_ptr diagnostic; + context.getDiagEngine().registerHandler([&](Diagnostic &diag) { + diagnostic.reset(new Diagnostic(std::move(diag))); + }); + + // Instantiate and run our pass. + PassManager pm(&context); + pm.addPass(std::make_unique()); + LogicalResult result = pm.run(module.get()); + EXPECT_TRUE(failed(result)); + ASSERT_TRUE(diagnostic.get() != nullptr); + EXPECT_EQ( + diagnostic->str(), + "'invalid_op' op trying to schedule a pass on an unregistered operation"); +} + } // end namespace From 848b0e244c9ff5413c2eee6357d5faab1402d619 Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Wed, 2 Sep 2020 15:00:26 -0700 Subject: [PATCH 053/465] Improve error handling for SmallVector programming errors This patch changes errors in `SmallVector::grow` that are independent of memory capacity to be reported using report_fatal_error or std::length_error instead of report_bad_alloc_error, which falsely signals an OOM. It also cleans up a few related things: - makes report_bad_alloc_error to print the failure reason passed to it. - fixes the documentation to indicate that report_bad_alloc_error calls `abort()` not "an assertion" - uses a consistent name for the size/capacity argument to `grow` and `grow_pod` Reviewed By: mehdi_amini, MaskRay Differential Revision: https://reviews.llvm.org/D86892 --- llvm/include/llvm/ADT/SmallVector.h | 34 +++++++++++++++++------ llvm/include/llvm/Support/ErrorHandling.h | 6 ++-- llvm/lib/Support/ErrorHandling.cpp | 8 ++++-- llvm/lib/Support/SmallVector.cpp | 30 +++++++++++++++----- 4 files changed, 56 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 3ccee3d21d48b..5d8658f612718 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -60,7 +60,7 @@ template class SmallVectorBase { /// This is an implementation of the grow() method which only works /// on POD-like data types and is out of line to reduce code duplication. /// This function will report a fatal error if it cannot increase capacity. - void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize); + void grow_pod(void *FirstEl, size_t MinSize, size_t TSize); public: size_t size() const { return Size; } @@ -115,8 +115,8 @@ class SmallVectorTemplateCommon protected: SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {} - void grow_pod(size_t MinCapacity, size_t TSize) { - Base::grow_pod(getFirstEl(), MinCapacity, TSize); + void grow_pod(size_t MinSize, size_t TSize) { + Base::grow_pod(getFirstEl(), MinSize, TSize); } /// Return true if this is a smallvector which has not had dynamic @@ -268,16 +268,32 @@ template void SmallVectorTemplateBase::grow(size_t MinSize) { // Ensure we can fit the new capacity. // This is only going to be applicable when the capacity is 32 bit. - if (MinSize > this->SizeTypeMax()) - report_bad_alloc_error("SmallVector capacity overflow during allocation"); + if (MinSize > this->SizeTypeMax()) { + std::string Reason = "SmallVector unable to grow. Requested capacity (" + + std::to_string(MinSize) + + ") is larger than maximum value for size type (" + + std::to_string(this->SizeTypeMax()) + ")"; +#ifdef LLVM_ENABLE_EXCEPTIONS + throw std::length_error(Reason); +#else + report_fatal_error(Reason); +#endif + } // Ensure we can meet the guarantee of space for at least one more element. // The above check alone will not catch the case where grow is called with a - // default MinCapacity of 0, but the current capacity cannot be increased. + // default MinSize of 0, but the current capacity cannot be increased. // This is only going to be applicable when the capacity is 32 bit. - if (this->capacity() == this->SizeTypeMax()) - report_bad_alloc_error("SmallVector capacity unable to grow"); - + if (this->capacity() == this->SizeTypeMax()) { + std::string Reason = + "SmallVector capacity unable to grow. Already at maximum size " + + std::to_string(this->SizeTypeMax()); +#ifdef LLVM_ENABLE_EXCEPTIONS + throw std::length_error(Reason); +#else + report_fatal_error(Reason); +#endif + } // Always grow, even from zero. size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2)); NewCapacity = std::min(std::max(NewCapacity, MinSize), this->SizeTypeMax()); diff --git a/llvm/include/llvm/Support/ErrorHandling.h b/llvm/include/llvm/Support/ErrorHandling.h index 7cbc668b3a0e8..0ec0242d569d3 100644 --- a/llvm/include/llvm/Support/ErrorHandling.h +++ b/llvm/include/llvm/Support/ErrorHandling.h @@ -110,9 +110,9 @@ void install_out_of_memory_new_handler(); /// the following unwind succeeds, e.g. do not trigger additional allocations /// in the unwind chain. /// -/// If no error handler is installed (default), then a bad_alloc exception -/// is thrown, if LLVM is compiled with exception support, otherwise an -/// assertion is called. +/// If no error handler is installed (default), throws a bad_alloc exception +/// if LLVM is compiled with exception support. Otherwise prints the error +/// to standard error and calls abort(). LLVM_ATTRIBUTE_NORETURN void report_bad_alloc_error(const char *Reason, bool GenCrashDiag = true); diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp index e962657730fe5..23b9f962422e8 100644 --- a/llvm/lib/Support/ErrorHandling.cpp +++ b/llvm/lib/Support/ErrorHandling.cpp @@ -168,9 +168,11 @@ void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) { #else // Don't call the normal error handler. It may allocate memory. Directly write // an OOM to stderr and abort. - char OOMMessage[] = "LLVM ERROR: out of memory\n"; - ssize_t written = ::write(2, OOMMessage, strlen(OOMMessage)); - (void)written; + const char *OOMMessage = "LLVM ERROR: out of memory\n"; + const char *Newline = "\n"; + (void)::write(2, OOMMessage, strlen(OOMMessage)); + (void)::write(2, Reason, strlen(Reason)); + (void)::write(2, Newline, strlen(Newline)); abort(); #endif } diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp index 6d5fe7165f633..73137640536c3 100644 --- a/llvm/lib/Support/SmallVector.cpp +++ b/llvm/lib/Support/SmallVector.cpp @@ -44,24 +44,40 @@ static_assert(sizeof(SmallVector) == // Note: Moving this function into the header may cause performance regression. template -void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity, +void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSize, size_t TSize) { // Ensure we can fit the new capacity. // This is only going to be applicable when the capacity is 32 bit. - if (MinCapacity > SizeTypeMax()) - report_bad_alloc_error("SmallVector capacity overflow during allocation"); + if (MinSize > SizeTypeMax()) { + std::string Reason = "SmallVector unable to grow. Requested capacity (" + + std::to_string(MinSize) + + ") is larger than maximum value for size type (" + + std::to_string(SizeTypeMax()) + ")"; +#ifdef LLVM_ENABLE_EXCEPTIONS + throw std::length_error(Reason); +#else + report_fatal_error(Reason); +#endif + } // Ensure we can meet the guarantee of space for at least one more element. // The above check alone will not catch the case where grow is called with a - // default MinCapacity of 0, but the current capacity cannot be increased. + // default MinSize of 0, but the current capacity cannot be increased. // This is only going to be applicable when the capacity is 32 bit. - if (capacity() == SizeTypeMax()) - report_bad_alloc_error("SmallVector capacity unable to grow"); + if (capacity() == SizeTypeMax()) { + std::string Reason = + "SmallVector capacity unable to grow. Already at maximum size " + + std::to_string(SizeTypeMax()); +#ifdef LLVM_ENABLE_EXCEPTIONS + throw std::length_error(Reason); +#endif + report_fatal_error(Reason); + } // In theory 2*capacity can overflow if the capacity is 64 bit, but the // original capacity would never be large enough for this to be a problem. size_t NewCapacity = 2 * capacity() + 1; // Always grow. - NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax()); + NewCapacity = std::min(std::max(NewCapacity, MinSize), SizeTypeMax()); void *NewElts; if (BeginX == FirstEl) { From b4f04d7135078dd22abbcedbabd3bd67ccb037d7 Mon Sep 17 00:00:00 2001 From: Huihui Zhang Date: Wed, 2 Sep 2020 15:02:04 -0700 Subject: [PATCH 054/465] [VectorCombine][SVE] Do not fold bitcast shuffle for scalable type. First, shuffle cost for scalable type is not known for scalable type; Second, we cannot reason if the narrowed shuffle mask for scalable type is a splat or not. E.g., Bitcast splat vector from type to will involve narrowing shuffle mask zeroinitializer to with element sequence of <0, 1, 0, 1, ...>, which cannot be reasoned if it's a valid splat or not. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D86995 --- .../Transforms/Vectorize/VectorCombine.cpp | 17 ++++++++------- .../VectorCombine/AArch64/lit.local.cfg | 2 ++ .../AArch64/vscale-bitcast-shuffle.ll | 21 +++++++++++++++++++ 3 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 llvm/test/Transforms/VectorCombine/AArch64/lit.local.cfg create mode 100644 llvm/test/Transforms/VectorCombine/AArch64/vscale-bitcast-shuffle.ll diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index a954b9b293154..29e9b92040d43 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -434,11 +434,14 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) { m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask)))))) return false; - // Disallow non-vector casts and length-changing shuffles. + // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for + // scalable type is unknown; Second, we cannot reason if the narrowed shuffle + // mask for scalable type is a splat or not. + // 2) Disallow non-vector casts and length-changing shuffles. // TODO: We could allow any shuffle. - auto *DestTy = dyn_cast(I.getType()); - auto *SrcTy = cast(V->getType()); - if (!DestTy || I.getOperand(0)->getType() != SrcTy) + auto *DestTy = dyn_cast(I.getType()); + auto *SrcTy = dyn_cast(V->getType()); + if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy) return false; // The new shuffle must not cost more than the old shuffle. The bitcast is @@ -447,10 +450,8 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) { TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy)) return false; - // FIXME: it should be possible to implement the computation of the widened - // shuffle mask in terms of ElementCount to work with scalable shuffles. - unsigned DestNumElts = cast(DestTy)->getNumElements(); - unsigned SrcNumElts = cast(SrcTy)->getNumElements(); + unsigned DestNumElts = DestTy->getNumElements(); + unsigned SrcNumElts = SrcTy->getNumElements(); SmallVector NewMask; if (SrcNumElts <= DestNumElts) { // The bitcast is from wide to narrow/equal elements. The shuffle mask can diff --git a/llvm/test/Transforms/VectorCombine/AArch64/lit.local.cfg b/llvm/test/Transforms/VectorCombine/AArch64/lit.local.cfg new file mode 100644 index 0000000000000..7184443994b69 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/VectorCombine/AArch64/vscale-bitcast-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/vscale-bitcast-shuffle.ll new file mode 100644 index 0000000000000..e474e1d2d75e9 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/vscale-bitcast-shuffle.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -vector-combine -S -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; This test checks we are not crashing with TTI when trying to get shuffle cost. +; This test also check that shuffle mask zeroinitializer is +; not narrowed into <0, 1, 0, 1, ...>, which we cannot reason if it's a valid +; splat or not. + +define @bitcast_shuffle( %a) { +; CHECK-LABEL: @bitcast_shuffle( +; CHECK-NEXT: [[I:%.*]] = shufflevector [[A:%.*]], undef, zeroinitializer +; CHECK-NEXT: [[R:%.*]] = bitcast [[I]] to +; CHECK-NEXT: ret [[R]] +; + %i = shufflevector %a, undef, zeroinitializer + %r = bitcast %i to + ret %r +} From 51128b670d4f757132e927c4f3dd78d257d37f70 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Wed, 2 Sep 2020 14:46:20 -0700 Subject: [PATCH 055/465] Fix de6caf871be79dc7549aebe4e4fb57d52f6ed202 Failure found in http://lab.llvm.org:8011/builders/lldb-x86_64-debian/builds/16855 The issue is a header not being included --- lldb/tools/lldb-vscode/VSCode.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h index 874feb9a97ce4..4a20c56c53eb0 100644 --- a/lldb/tools/lldb-vscode/VSCode.h +++ b/lldb/tools/lldb-vscode/VSCode.h @@ -9,6 +9,7 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODE_H #define LLDB_TOOLS_LLDB_VSCODE_VSCODE_H +#include #include #include #include From 69289cc10ffd1de4d3bf05d33948e6b21b6e68db Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Wed, 2 Sep 2020 17:04:35 -0500 Subject: [PATCH 056/465] [PowerPC] Fix broken kill flag after MI peephole The test case in https://bugs.llvm.org/show_bug.cgi?id=47373 exposed two bugs in the PPC back end. The first one was fixed in commit 27714075848e7f05a297317ad28ad2570d8e5a43 but the test case had to be added without -verify-machineinstrs due to the second bug. This commit fixes the use-after-kill that is left behind by the PPC MI peephole optimization. --- llvm/lib/Target/PowerPC/PPCMIPeephole.cpp | 2 ++ .../PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir | 2 +- llvm/test/CodeGen/PowerPC/mi-peephole.mir | 2 +- llvm/test/CodeGen/PowerPC/pr47373.ll | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index d2aba6bd6e8de..227c863685ae9 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -1555,6 +1555,8 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) { MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); MI.getOperand(2).setImm(NewSH); MI.getOperand(3).setImm(NewMB); + MI.getOperand(1).setIsKill(SrcMI->getOperand(1).isKill()); + SrcMI->getOperand(1).setIsKill(false); LLVM_DEBUG(dbgs() << "To: "); LLVM_DEBUG(MI.dump()); diff --git a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir index 7c14e7750df90..2f7a85a111ebb 100644 --- a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir +++ b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate-remove-SrcMI.mir @@ -51,4 +51,4 @@ body: | # # CHECK-PASS-NOT: %2:g8rc = RLDICL killed %1, 0, 32 # CHECK-PASS-NOT: %3:g8rc = RLDICR %2, 2, 61 -# CHECK-PASS: %3:g8rc = RLDIC %1, 2, 30 +# CHECK-PASS: %3:g8rc = RLDIC killed %1, 2, 30 diff --git a/llvm/test/CodeGen/PowerPC/mi-peephole.mir b/llvm/test/CodeGen/PowerPC/mi-peephole.mir index 8bf72461d5453..c7f41cd0bc4c9 100644 --- a/llvm/test/CodeGen/PowerPC/mi-peephole.mir +++ b/llvm/test/CodeGen/PowerPC/mi-peephole.mir @@ -31,7 +31,7 @@ body: | ; CHECK: bb.0.entry: ; CHECK: %1:g8rc = COPY $x4 ; CHECK: %0:g8rc = COPY $x3 - ; CHECK: %3:g8rc = RLDIC %1, 2, 30 + ; CHECK: %3:g8rc = RLDIC killed %1, 2, 30 ; CHECK: $x3 = COPY %3 ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $x3 ... diff --git a/llvm/test/CodeGen/PowerPC/pr47373.ll b/llvm/test/CodeGen/PowerPC/pr47373.ll index 559f4f9a8b4ae..d09a5fe8fb0b6 100644 --- a/llvm/test/CodeGen/PowerPC/pr47373.ll +++ b/llvm/test/CodeGen/PowerPC/pr47373.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=powerpc64-unknown-freebsd13.0 \ +; RUN: llc -mtriple=powerpc64-unknown-freebsd13.0 -verify-machineinstrs \ ; RUN: -mcpu=ppc64 -ppc-asm-full-reg-names < %s | FileCheck %s @a = local_unnamed_addr global float* null, align 8 From 55714678796c8fc456866add2841f3dd282e5392 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 2 Sep 2020 18:11:26 -0400 Subject: [PATCH 057/465] [libc++] Avoid including on non-Apple platforms in --- libcxx/include/ctime | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libcxx/include/ctime b/libcxx/include/ctime index 3aa619daa3581..b0e6c65af5d5f 100644 --- a/libcxx/include/ctime +++ b/libcxx/include/ctime @@ -59,9 +59,11 @@ int timespec_get( struct timespec *ts, int base); // C++17 // we're detecting this here instead of in <__config> because we can't include // system headers from <__config>, since it leads to circular module dependencies. // This is also meant to be a very temporary workaround until the SDKs are fixed. -#include -#if defined(__APPLE__) && defined(_LIBCPP_HAS_TIMESPEC_GET) && (__DARWIN_C_LEVEL < __DARWIN_C_FULL) -# define _LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED +#if defined(__APPLE__) +# include +# if defined(_LIBCPP_HAS_TIMESPEC_GET) && (__DARWIN_C_LEVEL < __DARWIN_C_FULL) +# define _LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED +# endif #endif _LIBCPP_BEGIN_NAMESPACE_STD From 46781630a34ea2157bf59f9de5dc545dcd61a146 Mon Sep 17 00:00:00 2001 From: Diego Caballero Date: Thu, 3 Sep 2020 00:56:43 +0300 Subject: [PATCH 058/465] [MLIR][Affine][VectorOps] Vectorize uniform values in SuperVectorizer This patch adds basic support for vectorization of uniform values to SuperVectorizer. For now, only invariant values to the target vector loops are considered uniform. This enables the vectorization of loops that use function arguments and external definitions to the vector loops. We could extend uniform support in the future if we implement some kind of divergence analysis algorithm. Reviewed By: nicolasvasilache, aartbik Differential Revision: https://reviews.llvm.org/D86756 --- mlir/include/mlir/IR/Builders.h | 14 +++++ .../Affine/Transforms/SuperVectorize.cpp | 55 ++++++++++++++--- .../SuperVectorize/uniform_divergent.mlir | 60 +++++++++++++++++++ .../Affine/SuperVectorize/vectorize_1d.mlir | 19 ------ 4 files changed, 122 insertions(+), 26 deletions(-) create mode 100644 mlir/test/Dialect/Affine/SuperVectorize/uniform_divergent.mlir diff --git a/mlir/include/mlir/IR/Builders.h b/mlir/include/mlir/IR/Builders.h index aa1cc0a1a2b47..0c30869752ea3 100644 --- a/mlir/include/mlir/IR/Builders.h +++ b/mlir/include/mlir/IR/Builders.h @@ -328,6 +328,20 @@ class OpBuilder : public Builder { setInsertionPoint(op->getBlock(), ++Block::iterator(op)); } + /// Sets the insertion point to the node after the specified value. If value + /// has a defining operation, sets the insertion point to the node after such + /// defining operation. This will cause subsequent insertions to go right + /// after it. Otherwise, value is a BlockArgumen. Sets the insertion point to + /// the start of its block. + void setInsertionPointAfter(Value val) { + if (Operation *op = val.getDefiningOp()) { + setInsertionPointAfter(op); + } else { + auto blockArg = val.cast(); + setInsertionPointToStart(blockArg.getOwner()); + } + } + /// Sets the insertion point to the start of the specified block. void setInsertionPointToStart(Block *block) { setInsertionPoint(block, block->begin()); diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp index 748530f033585..1de7b8957711a 100644 --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/Debug.h" using namespace mlir; +using namespace vector; /// /// Implements a high-level vectorization strategy on a Function. @@ -918,6 +919,42 @@ static Value vectorizeConstant(Operation *op, ConstantOp constant, Type type) { return b.createOperation(state)->getResult(0); } +/// Returns the vector type resulting from applying the provided vectorization +/// strategy on the scalar type. +static VectorType getVectorType(Type scalarTy, + const VectorizationStrategy *strategy) { + assert(!scalarTy.isa() && "Expected scalar type"); + return VectorType::get(strategy->vectorSizes, scalarTy); +} + +/// Returns true if the provided value is vector uniform given the vectorization +/// strategy. +// TODO: For now, only values that are invariants to all the loops in the +// vectorization strategy are considered vector uniforms. +static bool isUniformDefinition(Value value, + const VectorizationStrategy *strategy) { + for (auto loopToDim : strategy->loopToVectorDim) { + auto loop = cast(loopToDim.first); + if (!loop.isDefinedOutsideOfLoop(value)) + return false; + } + return true; +} + +/// Generates a broadcast op for the provided uniform value using the +/// vectorization strategy in 'state'. +static Value vectorizeUniform(Value value, VectorizationState *state) { + OpBuilder builder(value.getContext()); + builder.setInsertionPointAfter(value); + + auto vectorTy = getVectorType(value.getType(), state->strategy); + auto bcast = builder.create(value.getLoc(), vectorTy, value); + + // Add broadcast to the replacement map to reuse it for other uses. + state->replacementMap[value] = bcast; + return bcast; +} + /// Tries to vectorize a given operand `op` of Operation `op` during /// def-chain propagation or during terminal vectorization, by applying the /// following logic: @@ -927,7 +964,8 @@ static Value vectorizeConstant(Operation *op, ConstantOp constant, Type type) { /// vectorize atm (i.e. broadcasting required), returns nullptr to indicate /// failure; /// 3. if the `op` is a constant, returns the vectorized form of the constant; -/// 4. non-constant scalars are currently non-vectorizable, in particular to +/// 4. if the `op` is uniform, returns a vector broadcast of the `op`; +/// 5. non-constant scalars are currently non-vectorizable, in particular to /// guard against vectorizing an index which may be loop-variant and needs /// special handling. /// @@ -963,12 +1001,15 @@ static Value vectorizeOperand(Value operand, Operation *op, return nullptr; } // 3. vectorize constant. - if (auto constant = operand.getDefiningOp()) { - return vectorizeConstant( - op, constant, - VectorType::get(state->strategy->vectorSizes, operand.getType())); - } - // 4. currently non-vectorizable. + if (auto constant = operand.getDefiningOp()) + return vectorizeConstant(op, constant, + getVectorType(operand.getType(), state->strategy)); + + // 4. Uniform values. + if (isUniformDefinition(operand, state->strategy)) + return vectorizeUniform(operand, state); + + // 5. currently non-vectorizable. LLVM_DEBUG(dbgs() << "-> non-vectorizable: " << operand); return nullptr; } diff --git a/mlir/test/Dialect/Affine/SuperVectorize/uniform_divergent.mlir b/mlir/test/Dialect/Affine/SuperVectorize/uniform_divergent.mlir new file mode 100644 index 0000000000000..9d9f06177e3b3 --- /dev/null +++ b/mlir/test/Dialect/Affine/SuperVectorize/uniform_divergent.mlir @@ -0,0 +1,60 @@ +// RUN: mlir-opt %s -affine-super-vectorize="virtual-vector-size=128" -split-input-file | FileCheck %s + +// Specific tests to check vectorization of uniform/divergent values. + +// CHECK-LABEL: @uniform_arg +// CHECK-SAME: %[[in:.*]]: memref<512xf32>, +// CHECK-SAME: %[[uniform:.*]]: f32 +func @uniform_arg(%in : memref<512xf32>, %uniform : f32) { + affine.for %i = 0 to 512 { + %ld = affine.load %in[%i] : memref<512xf32> + %add = addf %ld, %uniform : f32 + } + return +} + +// CHECK-NEXT: %[[bcast:.*]] = vector.broadcast %[[uniform]] : f32 to vector<128xf32> +// CHECK-NEXT: affine.for +// CHECK: addf %{{.*}}, %[[bcast]] : vector<128xf32> + +// ----- + +// CHECK-LABEL: @multi_use_uniform_arg +// CHECK-SAME: %[[in:.*]]: memref<512xf32> +// CHECK-SAME: %[[uniform:.*]]: f32 +func @multi_use_uniform_arg(%in : memref<512xf32>, %uniform : f32) { + affine.for %i = 0 to 512 { + %ld = affine.load %in[%i] : memref<512xf32> + %user0 = addf %ld, %uniform : f32 + %user1 = addf %ld, %uniform : f32 + } + return +} + +// CHECK-NEXT: %[[bcast:.*]] = vector.broadcast %[[uniform]] : f32 to vector<128xf32> +// CHECK-NOT: vector.broadcast +// CHECK-NEXT: affine.for +// CHECK: addf %{{.*}}, %[[bcast]] : vector<128xf32> +// CHECK: addf %{{.*}}, %[[bcast]] : vector<128xf32> + +// ----- + +// CHECK-LABEL: @uniform_load +func @uniform_load(%A : memref, %C : memref) { + %c0 = constant 0 : index + %N = dim %A, %c0 : memref + affine.for %i = 0 to %N { + %uniform_ld = affine.load %A[%i, %i] : memref + affine.for %j = 0 to %N { + %b = affine.load %A[%i, %j] : memref + %c = addf %uniform_ld, %b : f32 + } + } + return +} + +// CHECK: affine.for +// CHECK-NEXT: %[[uniform_ld:.*]] = affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref +// CHECK-NEXT: %[[bcast:.*]] = vector.broadcast %[[uniform_ld]] : f32 to vector<128xf32> +// CHECK-NEXT: affine.for +// CHECK: addf %[[bcast]], %{{.*}} : vector<128xf32> diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir index bbeced633bcd8..66429907205e9 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir @@ -396,25 +396,6 @@ func @vec_rejected_10(%A : memref, %B : memref) { return } -// This should not vectorize and should not crash. -// CHECK-LABEL: @vec_rejected_11 -func @vec_rejected_11(%A : memref, %C : memref) { - %c0 = constant 0 : index - %N = dim %A, %c0 : memref - affine.for %i = 0 to %N { -// CHECK-NOT: vector - %a = affine.load %A[%i, %i] : memref // not vectorized - affine.for %j = 0 to %N { - %b = affine.load %A[%i, %j] : memref // may be vectorized -// CHECK-NOT: vector - %c = addf %a, %b : f32 // not vectorized because %a wasn't -// CHECK-NOT: vector - affine.store %c, %C[%i, %j] : memref // not vectorized because %c wasn't - } - } - return -} - // This should not vectorize due to the sequential dependence in the scf. // CHECK-LABEL: @vec_rejected_sequential func @vec_rejected_sequential(%A : memref) { From 6478caa0340768e322b82690d987c039cb78550d Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 2 Sep 2020 23:57:47 +0000 Subject: [PATCH 059/465] Tweak `mlir-linalg-ods-gen` library dependency: only MLIRIR is needed, not the entire parser (NFC) --- mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt b/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt index 3736a18b20091..bc9a0c1f310a1 100644 --- a/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt +++ b/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt @@ -7,6 +7,6 @@ add_llvm_tool(mlir-linalg-ods-gen ) llvm_update_compile_flags(mlir-linalg-ods-gen) target_link_libraries(mlir-linalg-ods-gen PRIVATE - MLIRParser MLIRSupport + MLIRIR ) From 5f6ca065a55fcbd2b39e3d32b3eb3deb04aa3e97 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Wed, 2 Sep 2020 17:05:19 -0700 Subject: [PATCH 060/465] Revert de6caf871be79dc7549aebe4e4fb57d52f6ed202 and 51128b670d4f757132e927c4f3dd78d257d37f70 (https://reviews.llvm.org/D84974) The tests seem to be timing out in all linux bots. Need further analysis. Revert "run in terminal" This reverts commit de6caf871be79dc7549aebe4e4fb57d52f6ed202. --- .../tools/lldb-vscode/lldbvscode_testcase.py | 14 +- .../test/tools/lldb-vscode/vscode.py | 30 +--- .../tools/lldb-vscode/runInTerminal/Makefile | 3 - .../runInTerminal/TestVSCode_runInTerminal.py | 48 ----- .../tools/lldb-vscode/runInTerminal/main.c | 11 -- lldb/tools/lldb-vscode/JSONUtils.cpp | 40 ----- lldb/tools/lldb-vscode/JSONUtils.h | 12 -- lldb/tools/lldb-vscode/VSCode.cpp | 73 +------- lldb/tools/lldb-vscode/VSCode.h | 45 ----- lldb/tools/lldb-vscode/lldb-vscode.cpp | 167 ++++++++---------- lldb/tools/lldb-vscode/package.json | 5 - 11 files changed, 82 insertions(+), 366 deletions(-) delete mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile delete mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py delete mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/main.c diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py index 5710751ec34bf..fa5a9c0db1ebd 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py @@ -282,7 +282,7 @@ def launch(self, program=None, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, debuggerRoot=None, launchCommands=None, - sourceMap=None, disconnectAutomatically=True, runInTerminal=False): + sourceMap=None, disconnectAutomatically=True): '''Sending launch request to vscode ''' @@ -316,16 +316,10 @@ def cleanup(): sourcePath=sourcePath, debuggerRoot=debuggerRoot, launchCommands=launchCommands, - sourceMap=sourceMap, - runInTerminal=runInTerminal) + sourceMap=sourceMap) if not (response and response['success']): self.assertTrue(response['success'], 'launch failed (%s)' % (response['message'])) - # We need to trigger a request_configurationDone after we've successfully - # attached a runInTerminal process to finish initialization. - if runInTerminal: - self.vscode.request_configurationDone() - def build_and_launch(self, program, args=None, cwd=None, env=None, stopOnEntry=False, disableASLR=True, @@ -333,7 +327,7 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, - debuggerRoot=None, runInTerminal=False): + debuggerRoot=None): '''Build the default Makefile target, create the VSCode debug adaptor, and launch the process. ''' @@ -343,4 +337,4 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, self.launch(program, args, cwd, env, stopOnEntry, disableASLR, disableSTDIO, shellExpandArguments, trace, initCommands, preRunCommands, stopCommands, exitCommands, - terminateCommands, sourcePath, debuggerRoot, runInTerminal=runInTerminal) + terminateCommands, sourcePath, debuggerRoot) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index 834e33ef5c3da..6b1c1c961b545 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -300,29 +300,12 @@ def send_recv(self, command): self.send_packet(command) done = False while not done: - response_or_request = self.recv_packet(filter_type=['response', 'request']) - if response_or_request is None: + response = self.recv_packet(filter_type='response') + if response is None: desc = 'no response for "%s"' % (command['command']) raise ValueError(desc) - if response_or_request['type'] == 'response': - self.validate_response(command, response_or_request) - return response_or_request - else: - if response_or_request['command'] == 'runInTerminal': - subprocess.Popen(response_or_request['arguments']['args'], - env=response_or_request['arguments']['env']) - self.send_packet({ - "type": "response", - "seq": -1, - "request_seq": response_or_request['seq'], - "success": True, - "command": "runInTerminal", - "body": {} - }, set_sequence=False) - else: - desc = 'unkonwn reverse request "%s"' % (response_or_request['command']) - raise ValueError(desc) - + self.validate_response(command, response) + return response return None def wait_for_event(self, filter=None, timeout=None): @@ -616,8 +599,7 @@ def request_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None ,sourcePath=None, - debuggerRoot=None, launchCommands=None, sourceMap=None, - runInTerminal=False): + debuggerRoot=None, launchCommands=None, sourceMap=None): args_dict = { 'program': program } @@ -656,8 +638,6 @@ def request_launch(self, program, args=None, cwd=None, env=None, args_dict['launchCommands'] = launchCommands if sourceMap: args_dict['sourceMap'] = sourceMap - if runInTerminal: - args_dict['runInTerminal'] = runInTerminal command_dict = { 'command': 'launch', 'type': 'request', diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile b/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile deleted file mode 100644 index 10495940055b6..0000000000000 --- a/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -C_SOURCES := main.c - -include Makefile.rules diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py deleted file mode 100644 index b21871e9ac949..0000000000000 --- a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -Test lldb-vscode runInTerminal reverse request -""" - - -import unittest2 -import vscode -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil -import lldbvscode_testcase -import time -import os - - -class TestVSCode_runInTerminal(lldbvscode_testcase.VSCodeTestCaseBase): - - mydir = TestBase.compute_mydir(__file__) - - @skipIfWindows - @skipIfRemote - def test_runInTerminal(self): - ''' - Tests the "runInTerminal" reverse request. It makes sure that the IDE can - launch the inferior with the correct environment variables and arguments. - ''' - program = self.getBuildArtifact("a.out") - source = 'main.c' - self.build_and_launch(program, stopOnEntry=True, runInTerminal=True, args=["foobar"], env=["FOO=bar"]) - breakpoint_line = line_number(source, '// breakpoint') - - self.set_source_breakpoints(source, [breakpoint_line]) - self.continue_to_next_stop() - - # We verify we actually stopped inside the loop - counter = int(self.vscode.get_local_variable_value('counter')) - self.assertTrue(counter > 0) - - # We verify we were able to set the launch arguments - argc = int(self.vscode.get_local_variable_value('argc')) - self.assertEqual(argc, 2) - - argv1 = self.vscode.request_evaluate('argv[1]')['body']['result'] - self.assertIn('foobar', argv1) - - # We verify we were able to set the environment - env = self.vscode.request_evaluate('foo')['body']['result'] - self.assertIn('bar', env) diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c b/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c deleted file mode 100644 index 676bd830e657b..0000000000000 --- a/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c +++ /dev/null @@ -1,11 +0,0 @@ -#include -#include -#include - -int main(int argc, char *argv[]) { - const char *foo = getenv("FOO"); - for (int counter = 1;; counter++) { - sleep(1); // breakpoint - } - return 0; -} diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 044bfd13ec463..36156ca2c42f9 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -998,44 +998,4 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit) { return llvm::json::Value(std::move(object)); } -/// See -/// https://microsoft.github.io/debug-adapter-protocol/specification#Reverse_Requests_RunInTerminal -llvm::json::Object -CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request) { - llvm::json::Object reverse_request; - reverse_request.try_emplace("type", "request"); - reverse_request.try_emplace("command", "runInTerminal"); - - llvm::json::Object run_in_terminal_args; - // This indicates the IDE to open an embedded terminal, instead of opening the - // terminal in a new window. - run_in_terminal_args.try_emplace("kind", "integrated"); - - auto launch_request_arguments = launch_request.getObject("arguments"); - std::vector args = GetStrings(launch_request_arguments, "args"); - // The program path must be the first entry in the "args" field - args.insert(args.begin(), - GetString(launch_request_arguments, "program").str()); - run_in_terminal_args.try_emplace("args", args); - - const auto cwd = GetString(launch_request_arguments, "cwd"); - if (!cwd.empty()) - run_in_terminal_args.try_emplace("cwd", cwd); - - // We need to convert the input list of environments variables into a - // dictionary - std::vector envs = GetStrings(launch_request_arguments, "env"); - llvm::json::Object environment; - for (const std::string &env : envs) { - size_t index = env.find("="); - environment.try_emplace(env.substr(0, index), env.substr(index + 1)); - } - run_in_terminal_args.try_emplace("env", - llvm::json::Value(std::move(environment))); - - reverse_request.try_emplace( - "arguments", llvm::json::Value(std::move(run_in_terminal_args))); - return reverse_request; -} - } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h index 88cbef9e5fdd4..df4428f390ba2 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.h +++ b/lldb/tools/lldb-vscode/JSONUtils.h @@ -443,18 +443,6 @@ llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference, llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit); -/// Create a runInTerminal reverse request object -/// -/// \param[in] launch_request -/// The original launch_request object whose fields are used to construct -/// the reverse request object. -/// -/// \return -/// A "runInTerminal" JSON object that follows the specification outlined by -/// Microsoft. -llvm::json::Object -CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request); - } // namespace lldb_vscode #endif diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index 2022f8319534e..537cae7868631 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -38,8 +38,7 @@ VSCode::VSCode() {"swift_catch", "Swift Catch", lldb::eLanguageTypeSwift}, {"swift_throw", "Swift Throw", lldb::eLanguageTypeSwift}}), focus_tid(LLDB_INVALID_THREAD_ID), sent_terminated_event(false), - stop_at_entry(false), is_attach(false), - waiting_for_run_in_terminal(false), reverse_request_seq(0) { + stop_at_entry(false), is_attach(false) { const char *log_file_path = getenv("LLDBVSCODE_LOG"); #if defined(_WIN32) // Windows opens stdout and stdin in text mode which converts \n to 13,10 @@ -363,74 +362,4 @@ void VSCode::SetTarget(const lldb::SBTarget target) { } } -PacketStatus VSCode::GetObject(llvm::json::Object &object) { - std::string json = ReadJSON(); - if (json.empty()) - return PacketStatus::EndOfFile; - - llvm::StringRef json_sref(json); - llvm::Expected json_value = llvm::json::parse(json_sref); - if (!json_value) { - auto error = json_value.takeError(); - if (log) { - std::string error_str; - llvm::raw_string_ostream strm(error_str); - strm << error; - strm.flush(); - *log << "error: failed to parse JSON: " << error_str << std::endl - << json << std::endl; - } - return PacketStatus::JSONMalformed; - } - object = *json_value->getAsObject(); - if (!json_value->getAsObject()) { - if (log) - *log << "error: json packet isn't a object" << std::endl; - return PacketStatus::JSONNotObject; - } - return PacketStatus::Success; -} - -bool VSCode::HandleObject(const llvm::json::Object &object) { - const auto packet_type = GetString(object, "type"); - if (packet_type == "request") { - const auto command = GetString(object, "command"); - auto handler_pos = request_handlers.find(std::string(command)); - if (handler_pos != request_handlers.end()) { - handler_pos->second(object); - return true; // Success - } else { - if (log) - *log << "error: unhandled command \"" << command.data() << std::endl; - return false; // Fail - } - } - return false; -} - -PacketStatus VSCode::SendReverseRequest(llvm::json::Object request, - llvm::json::Object &response) { - request.try_emplace("seq", ++reverse_request_seq); - SendJSON(llvm::json::Value(std::move(request))); - bool got_response = false; - while (!got_response) { - PacketStatus status = GetObject(response); - const auto packet_type = GetString(response, "type"); - if (packet_type == "response") { - if (status == PacketStatus::Success) { - return status; - // Not our response, we got another packet - HandleObject(response); - } else { - return status; - } - } - } -} - -void VSCode::RegisterRequestCallback(std::string request, - RequestCallback callback) { - request_handlers[request] = callback; -} - } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h index 4a20c56c53eb0..88a0c08de2454 100644 --- a/lldb/tools/lldb-vscode/VSCode.h +++ b/lldb/tools/lldb-vscode/VSCode.h @@ -9,7 +9,6 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODE_H #define LLDB_TOOLS_LLDB_VSCODE_VSCODE_H -#include #include #include #include @@ -20,7 +19,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" #include "lldb/API/SBAttachInfo.h" @@ -67,15 +65,6 @@ enum class OutputType { Console, Stdout, Stderr, Telemetry }; enum VSCodeBroadcasterBits { eBroadcastBitStopEventThread = 1u << 0 }; -typedef void (*RequestCallback)(const llvm::json::Object &command); - -enum class PacketStatus { - Success = 0, - EndOfFile, - JSONMalformed, - JSONNotObject -}; - struct VSCode { InputStream input; OutputStream output; @@ -102,10 +91,6 @@ struct VSCode { bool sent_terminated_event; bool stop_at_entry; bool is_attach; - uint32_t reverse_request_seq; - std::map request_handlers; - std::condition_variable request_in_terminal_cv; - bool waiting_for_run_in_terminal; // Keep track of the last stop thread index IDs as threads won't go away // unless we send a "thread" event to indicate the thread exited. llvm::DenseSet thread_ids; @@ -167,36 +152,6 @@ struct VSCode { /// Set given target object as a current target for lldb-vscode and start /// listeing for its breakpoint events. void SetTarget(const lldb::SBTarget target); - - const std::map &GetRequestHandlers(); - - PacketStatus GetObject(llvm::json::Object &object); - bool HandleObject(const llvm::json::Object &object); - - /// Send a Debug Adapter Protocol reverse request to the IDE - /// - /// \param[in] request - /// The payload of the request to send. - /// - /// \param[out] response - /// The response of the IDE. It might be undefined if there was an error. - /// - /// \return - /// A \a PacketStatus object indicating the sucess or failure of the - /// request. - PacketStatus SendReverseRequest(llvm::json::Object request, - llvm::json::Object &response); - - /// Registers a callback handler for a Debug Adapter Protocol request - /// - /// \param[in] request - /// The name of the request following the Debug Adapter Protocol - /// specification. - /// - /// \param[in] callback - /// The callback to execute when the given request is triggered by the - /// IDE. - void RegisterRequestCallback(std::string request, RequestCallback callback); }; extern VSCode g_vsc; diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index ee01822ba6217..54f2e653d0697 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -384,12 +384,7 @@ void EventThreadFunction() { break; case lldb::eStateSuspended: break; - case lldb::eStateStopped: { - if (g_vsc.waiting_for_run_in_terminal) { - g_vsc.waiting_for_run_in_terminal = false; - g_vsc.request_in_terminal_cv.notify_one(); - } - } + case lldb::eStateStopped: // Only report a stopped event if the process was not restarted. if (!lldb::SBProcess::GetRestartedFromEvent(event)) { SendStdOutStdErr(process); @@ -1379,9 +1374,6 @@ void request_initialize(const llvm::json::Object &request) { filters.emplace_back(CreateExceptionBreakpointFilter(exc_bp)); } body.try_emplace("exceptionBreakpointFilters", std::move(filters)); - // The debug adapter supports launching a debugee in intergrated VSCode - // terminal. - body.try_emplace("supportsRunInTerminalRequest", true); // The debug adapter supports stepping back via the stepBack and // reverseContinue requests. body.try_emplace("supportsStepBack", false); @@ -1441,49 +1433,6 @@ void request_initialize(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } -void request_runInTerminal(const llvm::json::Object &launch_request, - llvm::json::Object &launch_response) { - // We have already created a target that has a valid "program" path to the - // executable. We will attach to the next process whose name matches that - // of the target's. - g_vsc.is_attach = true; - lldb::SBAttachInfo attach_info; - lldb::SBError error; - attach_info.SetWaitForLaunch(true, /*async*/ true); - g_vsc.target.Attach(attach_info, error); - - llvm::json::Object reverse_request = - CreateRunInTerminalReverseRequest(launch_request); - llvm::json::Object reverse_response; - lldb_vscode::PacketStatus status = - g_vsc.SendReverseRequest(reverse_request, reverse_response); - if (status != lldb_vscode::PacketStatus::Success) - error.SetErrorString("Process cannot be launched by IDE."); - - if (error.Success()) { - // Wait for the attach stop event to happen or for a timeout. - g_vsc.waiting_for_run_in_terminal = true; - static std::mutex mutex; - std::unique_lock locker(mutex); - g_vsc.request_in_terminal_cv.wait_for(locker, std::chrono::seconds(10)); - - auto attached_pid = g_vsc.target.GetProcess().GetProcessID(); - if (attached_pid == LLDB_INVALID_PROCESS_ID) - error.SetErrorString("Failed to attach to a process"); - else - SendProcessEvent(Attach); - } - - if (error.Fail()) { - launch_response["success"] = llvm::json::Value(false); - EmplaceSafeString(launch_response, "message", - std::string(error.GetCString())); - } else { - launch_response["success"] = llvm::json::Value(true); - g_vsc.SendJSON(CreateEventObject("initialized")); - } -} - // "LaunchRequest": { // "allOf": [ { "$ref": "#/definitions/Request" }, { // "type": "object", @@ -1556,12 +1505,6 @@ void request_launch(const llvm::json::Object &request) { return; } - if (GetBoolean(arguments, "runInTerminal", false)) { - request_runInTerminal(request, response); - g_vsc.SendJSON(llvm::json::Value(std::move(response))); - return; - } - // Instantiate a launch info instance for the target. auto launch_info = g_vsc.target.GetLaunchInfo(); @@ -2888,35 +2831,39 @@ void request__testGetTargetBreakpoints(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } -void RegisterRequestCallbacks() { - g_vsc.RegisterRequestCallback("attach", request_attach); - g_vsc.RegisterRequestCallback("completions", request_completions); - g_vsc.RegisterRequestCallback("continue", request_continue); - g_vsc.RegisterRequestCallback("configurationDone", request_configurationDone); - g_vsc.RegisterRequestCallback("disconnect", request_disconnect); - g_vsc.RegisterRequestCallback("evaluate", request_evaluate); - g_vsc.RegisterRequestCallback("exceptionInfo", request_exceptionInfo); - g_vsc.RegisterRequestCallback("getCompileUnits", request_getCompileUnits); - g_vsc.RegisterRequestCallback("initialize", request_initialize); - g_vsc.RegisterRequestCallback("launch", request_launch); - g_vsc.RegisterRequestCallback("next", request_next); - g_vsc.RegisterRequestCallback("pause", request_pause); - g_vsc.RegisterRequestCallback("scopes", request_scopes); - g_vsc.RegisterRequestCallback("setBreakpoints", request_setBreakpoints); - g_vsc.RegisterRequestCallback("setExceptionBreakpoints", - request_setExceptionBreakpoints); - g_vsc.RegisterRequestCallback("setFunctionBreakpoints", - request_setFunctionBreakpoints); - g_vsc.RegisterRequestCallback("setVariable", request_setVariable); - g_vsc.RegisterRequestCallback("source", request_source); - g_vsc.RegisterRequestCallback("stackTrace", request_stackTrace); - g_vsc.RegisterRequestCallback("stepIn", request_stepIn); - g_vsc.RegisterRequestCallback("stepOut", request_stepOut); - g_vsc.RegisterRequestCallback("threads", request_threads); - g_vsc.RegisterRequestCallback("variables", request_variables); - // Testing requests - g_vsc.RegisterRequestCallback("_testGetTargetBreakpoints", - request__testGetTargetBreakpoints); +const std::map &GetRequestHandlers() { +#define REQUEST_CALLBACK(name) \ + { #name, request_##name } + static std::map g_request_handlers = { + // VSCode Debug Adaptor requests + REQUEST_CALLBACK(attach), + REQUEST_CALLBACK(completions), + REQUEST_CALLBACK(continue), + REQUEST_CALLBACK(configurationDone), + REQUEST_CALLBACK(disconnect), + REQUEST_CALLBACK(evaluate), + REQUEST_CALLBACK(exceptionInfo), + REQUEST_CALLBACK(getCompileUnits), + REQUEST_CALLBACK(initialize), + REQUEST_CALLBACK(launch), + REQUEST_CALLBACK(next), + REQUEST_CALLBACK(pause), + REQUEST_CALLBACK(scopes), + REQUEST_CALLBACK(setBreakpoints), + REQUEST_CALLBACK(setExceptionBreakpoints), + REQUEST_CALLBACK(setFunctionBreakpoints), + REQUEST_CALLBACK(setVariable), + REQUEST_CALLBACK(source), + REQUEST_CALLBACK(stackTrace), + REQUEST_CALLBACK(stepIn), + REQUEST_CALLBACK(stepOut), + REQUEST_CALLBACK(threads), + REQUEST_CALLBACK(variables), + // Testing requests + REQUEST_CALLBACK(_testGetTargetBreakpoints), + }; +#undef REQUEST_CALLBACK + return g_request_handlers; } } // anonymous namespace @@ -2948,8 +2895,6 @@ int main(int argc, char *argv[]) { // Initialize LLDB first before we do anything. lldb::SBDebugger::Initialize(); - RegisterRequestCallbacks(); - int portno = -1; LLDBVSCodeOptTable T; @@ -2992,17 +2937,49 @@ int main(int argc, char *argv[]) { g_vsc.output.descriptor = StreamDescriptor::from_file(fileno(stdout), false); } + auto request_handlers = GetRequestHandlers(); uint32_t packet_idx = 0; while (!g_vsc.sent_terminated_event) { - llvm::json::Object object; - lldb_vscode::PacketStatus status = g_vsc.GetObject(object); - if (status == lldb_vscode::PacketStatus::EndOfFile) + std::string json = g_vsc.ReadJSON(); + if (json.empty()) break; - if (status != lldb_vscode::PacketStatus::Success) - return 1; // Fatal error - if (!g_vsc.HandleObject(object)) + llvm::StringRef json_sref(json); + llvm::Expected json_value = llvm::json::parse(json_sref); + if (!json_value) { + auto error = json_value.takeError(); + if (g_vsc.log) { + std::string error_str; + llvm::raw_string_ostream strm(error_str); + strm << error; + strm.flush(); + + *g_vsc.log << "error: failed to parse JSON: " << error_str << std::endl + << json << std::endl; + } + return 1; + } + + auto object = json_value->getAsObject(); + if (!object) { + if (g_vsc.log) + *g_vsc.log << "error: json packet isn't a object" << std::endl; return 1; + } + + const auto packet_type = GetString(object, "type"); + if (packet_type == "request") { + const auto command = GetString(object, "command"); + auto handler_pos = request_handlers.find(std::string(command)); + if (handler_pos != request_handlers.end()) { + handler_pos->second(*object); + } else { + if (g_vsc.log) + *g_vsc.log << "error: unhandled command \"" << command.data() + << std::endl; + return 1; + } + } ++packet_idx; } diff --git a/lldb/tools/lldb-vscode/package.json b/lldb/tools/lldb-vscode/package.json index 9077ab51dd7fa..29ca06dd17d63 100644 --- a/lldb/tools/lldb-vscode/package.json +++ b/lldb/tools/lldb-vscode/package.json @@ -175,11 +175,6 @@ "type": "array", "description": "Commands executed at the end of debugging session.", "default": [] - }, - "runInTerminal": { - "type": "boolean", - "description": "Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs", - "default": false } } }, From 232448238383866d0181f540426a6b6b9e12197d Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 2 Sep 2020 17:34:35 -0700 Subject: [PATCH 061/465] [asan_symbolize] Remove --use-symbol-table=true which is the default --- compiler-rt/lib/asan/scripts/asan_symbolize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py index a2e38238a97c6..1e06e0d56fb94 100755 --- a/compiler-rt/lib/asan/scripts/asan_symbolize.py +++ b/compiler-rt/lib/asan/scripts/asan_symbolize.py @@ -89,7 +89,6 @@ def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): def open_llvm_symbolizer(self): cmd = [self.symbolizer_path, - '--use-symbol-table=true', '--demangle=%s' % demangle, '--functions=linkage', '--inlines', From 67ce11405b08609afb35e218ed7d28ef5e16a55f Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Thu, 3 Sep 2020 08:49:51 +0800 Subject: [PATCH 062/465] [llvm-dwarfdump] Warn user when it encounters no null terminated strings. When llvm-dwarfdump encounters no null terminated strings, we should warn user about it rather than ignore it and print nothing. Before this patch, when llvm-dwarfdump dumps a .debug_str section whose content is "abc", it prints: ``` .debug_str contents: ``` After this patch: ``` .debug_str contents: warning: no null terminated string at offset 0x0 ``` Reviewed By: jhenderson, MaskRay Differential Revision: https://reviews.llvm.org/D86998 --- llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 8 +++++++- llvm/test/tools/llvm-dwarfdump/debug-str.yaml | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 88f118bb05e3e..d31c358798211 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -530,7 +530,13 @@ void DWARFContext::dump( DataExtractor StrData(Section, isLittleEndian(), 0); uint64_t Offset = 0; uint64_t StrOffset = 0; - while (const char *CStr = StrData.getCStr(&Offset)) { + while (StrData.isValidOffset(Offset)) { + Error Err = Error::success(); + const char *CStr = StrData.getCStr(&Offset, &Err); + if (Err) { + DumpOpts.WarningHandler(std::move(Err)); + return; + } OS << format("0x%8.8" PRIx64 ": \"", StrOffset); OS.write_escaped(CStr); OS << "\"\n"; diff --git a/llvm/test/tools/llvm-dwarfdump/debug-str.yaml b/llvm/test/tools/llvm-dwarfdump/debug-str.yaml index 36729c1823107..0f8cf2f199029 100644 --- a/llvm/test/tools/llvm-dwarfdump/debug-str.yaml +++ b/llvm/test/tools/llvm-dwarfdump/debug-str.yaml @@ -44,3 +44,16 @@ Sections: # ESCAPED-NEXT: 0x00000002: "\001" # ESCAPED-NEXT: 0x00000004: "\\001" # ESCAPED-EMPTY: + +## c) Test that llvm-dwarfdump emits a warning when it encounters a string without a null terminator. + +## "abc\0" "abc" +# RUN: yaml2obj -DCONTENT="61626300616263" %s -o %t3.o +# RUN: llvm-dwarfdump --debug-str %t3.o 2>&1 | FileCheck %s --check-prefix=WARN + +# WARN: .debug_str contents: +# WARN-NEXT: 0x00000000: "abc" +# WARN-NEXT: warning: no null terminated string at offset 0x4 +# WARN: .debug_str.dwo contents: +# WARN-NEXT: 0x00000000: "abc" +# WARN-NEXT: warning: no null terminated string at offset 0x4 From 96ef6998dfcc4d7f2c7fcb87eb9369e94a40e127 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 1 Sep 2020 16:20:18 -0700 Subject: [PATCH 063/465] [InstCombine] Fix a couple crashes with extractelement on a scalable vector. Differential Revision: https://reviews.llvm.org/D86989 --- llvm/lib/Analysis/ValueTracking.cpp | 12 +++++----- .../InstCombine/InstCombineCasts.cpp | 4 +--- .../InstCombine/vscale_extractelement.ll | 22 +++++++++++++++++++ 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index b592412ed0b6f..3f7cf296ac1c5 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2568,11 +2568,13 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, const Value *Vec = EEI->getVectorOperand(); const Value *Idx = EEI->getIndexOperand(); auto *CIdx = dyn_cast(Idx); - unsigned NumElts = cast(Vec->getType())->getNumElements(); - APInt DemandedVecElts = APInt::getAllOnesValue(NumElts); - if (CIdx && CIdx->getValue().ult(NumElts)) - DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); - return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); + if (auto *VecTy = dyn_cast(Vec->getType())) { + unsigned NumElts = VecTy->getNumElements(); + APInt DemandedVecElts = APInt::getAllOnesValue(NumElts); + if (CIdx && CIdx->getValue().ult(NumElts)) + DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); + return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); + } } KnownBits Known(BitWidth); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 7cbbcddc14bae..35db212457546 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2224,9 +2224,7 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, if (!VectorType::isValidElementType(DestType)) return nullptr; - unsigned NumElts = - cast(ExtElt->getVectorOperandType())->getNumElements(); - auto *NewVecType = FixedVectorType::get(DestType, NumElts); + auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType()); auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(), NewVecType, "bc"); return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand()); diff --git a/llvm/test/Transforms/InstCombine/vscale_extractelement.ll b/llvm/test/Transforms/InstCombine/vscale_extractelement.ll index 8dcb167408698..0b148172a42ae 100644 --- a/llvm/test/Transforms/InstCombine/vscale_extractelement.ll +++ b/llvm/test/Transforms/InstCombine/vscale_extractelement.ll @@ -146,3 +146,25 @@ define @extractelement_insertelement_diff_positions( %3, i32 %vec.e3, i32 3 ret %4 } + +define i32 @bitcast_of_extractelement( %d) { +; CHECK-LABEL: @bitcast_of_extractelement( +; CHECK-NEXT: [[BC:%.*]] = bitcast [[D:%.*]] to +; CHECK-NEXT: [[CAST:%.*]] = extractelement [[BC]], i32 0 +; CHECK-NEXT: ret i32 [[CAST]] +; + %ext = extractelement %d, i32 0 + %cast = bitcast float %ext to i32 + ret i32 %cast +} + +define i1 @extractelement_is_zero( %d, i1 %b, i32 %z) { +; CHECK-LABEL: @extractelement_is_zero( +; CHECK-NEXT: [[EXT:%.*]] = extractelement [[D:%.*]], i32 0 +; CHECK-NEXT: [[BB:%.*]] = icmp eq i32 [[EXT]], 0 +; CHECK-NEXT: ret i1 [[BB]] +; + %ext = extractelement %d, i32 0 + %bb = icmp eq i32 %ext, 0 + ret i1 %bb +} From 0cefaec2e876182313826bd13d5b9f8f3623622f Mon Sep 17 00:00:00 2001 From: Jinsong Ji Date: Wed, 2 Sep 2020 20:41:15 +0000 Subject: [PATCH 064/465] [compiler-rt] Don't build llvm-lit in RUNTIMES-BUILD With runtimes-build, we lost map config for llvm-lit, so we can NOT run lit from source dir anymore. All the subdir target like: ninja check-llvm-codegen-powerpc will fail too. We will get some cfg error like: File "../lvm-project/llvm/test/lit.cfg.py", line 21, in config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) AttributeError: 'NoneType' object has no attribute 'use_lit_shell' This is reset of map config in llvm-lit rebuild. We already have llvm-lit in runtimes-build, so don't build it. Reviewed By: phosek Differential Revision: https://reviews.llvm.org/D87002 --- compiler-rt/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index c7e86946bcf35..0a0294f937dba 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -544,7 +544,8 @@ add_subdirectory(lib) if(COMPILER_RT_INCLUDE_TESTS) add_subdirectory(unittests) add_subdirectory(test) - if (COMPILER_RT_STANDALONE_BUILD) + # Don't build llvm-lit for runtimes-build, it will clean up map_config. + if (COMPILER_RT_STANDALONE_BUILD AND NOT RUNTIMES_BUILD) # If we have a valid source tree, generate llvm-lit into the bin directory. # The user can still choose to have the check targets *use* a different lit # by specifying -DLLVM_EXTERNAL_LIT, but we generate it regardless. From 198a13e970db2e88d0eaa62f69778388065edf70 Mon Sep 17 00:00:00 2001 From: Sunil Srivastava Date: Wed, 2 Sep 2020 19:12:24 -0700 Subject: [PATCH 065/465] Fix for PR46384. Failure on weak dllimport. Differential Revision: https://reviews.llvm.org/D86907 --- llvm/lib/IR/Verifier.cpp | 3 ++- llvm/test/Verifier/weak-dllimport.ll | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Verifier/weak-dllimport.ll diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 89fc0d073749c..6cae21e3cfe1a 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -589,7 +589,8 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) { Assert(!GV.isDSOLocal(), "GlobalValue with DLLImport Storage is dso_local!", &GV); - Assert((GV.isDeclaration() && GV.hasExternalLinkage()) || + Assert((GV.isDeclaration() && + (GV.hasExternalLinkage() || GV.hasExternalWeakLinkage())) || GV.hasAvailableExternallyLinkage(), "Global is marked as dllimport, but not external", &GV); } diff --git a/llvm/test/Verifier/weak-dllimport.ll b/llvm/test/Verifier/weak-dllimport.ll new file mode 100644 index 0000000000000..c7b6ed977b100 --- /dev/null +++ b/llvm/test/Verifier/weak-dllimport.ll @@ -0,0 +1,28 @@ +; RUN: opt -verify < %s 2>&1 | FileCheck %s +; CHECK-NOT: Global is marked as dllimport, but not external + +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.11.0" + +@"?var_hook@@3HA" = extern_weak dllimport global i32, align 4 + +; Function Attrs: noinline optnone uwtable +define dso_local zeroext i1 @"?foo@@YA_NPEAHH@Z"(i32* %0, i32 %1) #0 { + ret i1 0 +} + +declare extern_weak dllimport void @func_hook(i32) #1 + +attributes #0 = { noinline optnone uwtable } +attributes #1 = { uwtable } + +; Compiled from the following C++ example with --target=x86_64-pc-win32, +; using the non-checking configuration +;__declspec(dllimport) __attribute__((weak)) extern "C" void func_hook(int); +;extern __declspec(dllimport) __attribute__((weak)) int var_hook; +;bool foo(int *q, int p) +;{ +; if (func_hook) +; func_hook(p); +; return &var_hook == q; +;} From f434f7dabc1679d5218db359265f02a4eda6a94c Mon Sep 17 00:00:00 2001 From: Greg McGary Date: Wed, 2 Sep 2020 19:32:12 -0700 Subject: [PATCH 066/465] Add code owners of new MachO port --- lld/CODE_OWNERS.TXT | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lld/CODE_OWNERS.TXT b/lld/CODE_OWNERS.TXT index f019a87553aa0..44972c0d345a8 100644 --- a/lld/CODE_OWNERS.TXT +++ b/lld/CODE_OWNERS.TXT @@ -15,8 +15,12 @@ D: COFF, ELF backends (COFF/* ELF/*) N: Lang Hames, Nick Kledzik E: lhames@gmail.com, kledzik@apple.com -D: Mach-O backend +D: Old Mach-O backend N: Sam Clegg E: sbc@chromium.org D: WebAssembly backend (wasm/*) + +N: Jez Ng, Greg McGary, Shoaib Meenai +E: jezng@fb.com, gkm@fb.com, smeenai@fb.com +D: New Mach-O backend From f7e04b710d2d90ae913e737550f7eec03d2c9f37 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Wed, 2 Sep 2020 19:42:54 -0700 Subject: [PATCH 067/465] [lldb/Gui] zero-initialize children_stop_id This is currently causing msan warnings in the API tests when run under msan, e.g. `commands/gui/basic/TestGuiBasic.py`. Reviewed By: clayborg Differential Revision: https://reviews.llvm.org/D86825 --- lldb/source/Core/IOHandlerCursesGUI.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index 10aff7a6c2175..19066e6be6232 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -1497,19 +1497,18 @@ struct Row { ValueObjectManager value; Row *parent; // The process stop ID when the children were calculated. - uint32_t children_stop_id; - int row_idx; - int x; - int y; + uint32_t children_stop_id = 0; + int row_idx = 0; + int x = 1; + int y = 1; bool might_have_children; - bool expanded; - bool calculated_children; + bool expanded = false; + bool calculated_children = false; std::vector children; Row(const ValueObjectSP &v, Row *p) - : value(v, lldb::eDynamicDontRunTarget, true), parent(p), row_idx(0), - x(1), y(1), might_have_children(v ? v->MightHaveChildren() : false), - expanded(false), calculated_children(false), children() {} + : value(v, lldb::eDynamicDontRunTarget, true), parent(p), + might_have_children(v ? v->MightHaveChildren() : false) {} size_t GetDepth() const { if (parent) From d9b4245f56a98d8ea72d6f75d5bdd5c7c8e5c88c Mon Sep 17 00:00:00 2001 From: Artur Bialas Date: Wed, 2 Sep 2020 19:52:29 -0700 Subject: [PATCH 068/465] [mlir][spirv] Add block read and write from SPV_INTEL_subgroups Added support to OpSubgroupBlockReadINTEL and OpSubgroupBlockWriteINTEL Differential Revision: https://reviews.llvm.org/D86876 --- mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td | 5 +- .../mlir/Dialect/SPIRV/SPIRVGroupOps.td | 101 +++++++++++++++++- mlir/lib/Dialect/SPIRV/SPIRVOps.cpp | 100 +++++++++++++++++ .../SPIRV/Serialization/group-ops.mlir | 24 +++++ mlir/test/Dialect/SPIRV/group-ops.mlir | 40 +++++++ 5 files changed, 268 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td index ab0b761613425..6458183bdeb2d 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td @@ -3252,6 +3252,8 @@ def SPV_OC_OpCooperativeMatrixLoadNV : I32EnumAttrCase<"OpCooperativeMatrixLoa def SPV_OC_OpCooperativeMatrixStoreNV : I32EnumAttrCase<"OpCooperativeMatrixStoreNV", 5360>; def SPV_OC_OpCooperativeMatrixMulAddNV : I32EnumAttrCase<"OpCooperativeMatrixMulAddNV", 5361>; def SPV_OC_OpCooperativeMatrixLengthNV : I32EnumAttrCase<"OpCooperativeMatrixLengthNV", 5362>; +def SPV_OC_OpSubgroupBlockReadINTEL : I32EnumAttrCase<"OpSubgroupBlockReadINTEL", 5575>; +def SPV_OC_OpSubgroupBlockWriteINTEL : I32EnumAttrCase<"OpSubgroupBlockWriteINTEL", 5576>; def SPV_OpcodeAttr : SPV_I32EnumAttr<"Opcode", "valid SPIR-V instructions", [ @@ -3308,7 +3310,8 @@ def SPV_OpcodeAttr : SPV_OC_OpGroupNonUniformFMax, SPV_OC_OpSubgroupBallotKHR, SPV_OC_OpTypeCooperativeMatrixNV, SPV_OC_OpCooperativeMatrixLoadNV, SPV_OC_OpCooperativeMatrixStoreNV, SPV_OC_OpCooperativeMatrixMulAddNV, - SPV_OC_OpCooperativeMatrixLengthNV + SPV_OC_OpCooperativeMatrixLengthNV, SPV_OC_OpSubgroupBlockReadINTEL, + SPV_OC_OpSubgroupBlockWriteINTEL ]>; // End opcode section. Generated from SPIR-V spec; DO NOT MODIFY! diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVGroupOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVGroupOps.td index c9ce8be9927f1..7eab3b44601ed 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVGroupOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVGroupOps.td @@ -88,7 +88,6 @@ def SPV_GroupBroadcastOp : SPV_Op<"GroupBroadcast", let assemblyFormat = [{ $execution_scope operands attr-dict `:` type($value) `,` type($localid) }]; - } // ----- @@ -147,4 +146,104 @@ def SPV_SubgroupBallotKHROp : SPV_Op<"SubgroupBallotKHR", []> { // ----- +def SPV_SubgroupBlockReadINTELOp : SPV_Op<"SubgroupBlockReadINTEL", []> { + let summary = "See extension SPV_INTEL_subgroups"; + + let description = [{ + Reads one or more components of Result data for each invocation in the + subgroup from the specified Ptr as a block operation. + + The data is read strided, so the first value read is: + Ptr[ SubgroupLocalInvocationId ] + + and the second value read is: + Ptr[ SubgroupLocalInvocationId + SubgroupMaxSize ] + etc. + + Result Type may be a scalar or vector type, and its component type must be + equal to the type pointed to by Ptr. + + The type of Ptr must be a pointer type, and must point to a scalar type. + + + + ``` + subgroup-block-read-INTEL-op ::= ssa-id `=` `spv.SubgroupBlockReadINTEL` + storage-class ssa_use `:` spirv-element-type + ```mlir + + #### Example: + + ``` + %0 = spv.SubgroupBlockReadINTEL "StorageBuffer" %ptr : i32 + ``` + }]; + + let availability = [ + MinVersion, + MaxVersion, + Extension<[SPV_INTEL_subgroups]>, + Capability<[SPV_C_SubgroupBufferBlockIOINTEL]> + ]; + + let arguments = (ins + SPV_AnyPtr:$ptr + ); + + let results = (outs + SPV_Type:$value + ); +} + +// ----- + +def SPV_SubgroupBlockWriteINTELOp : SPV_Op<"SubgroupBlockWriteINTEL", []> { + let summary = "See extension SPV_INTEL_subgroups"; + + let description = [{ + Writes one or more components of Data for each invocation in the subgroup + from the specified Ptr as a block operation. + + The data is written strided, so the first value is written to: + Ptr[ SubgroupLocalInvocationId ] + + and the second value written is: + Ptr[ SubgroupLocalInvocationId + SubgroupMaxSize ] + etc. + + The type of Ptr must be a pointer type, and must point to a scalar type. + + The component type of Data must be equal to the type pointed to by Ptr. + + + + ``` + subgroup-block-write-INTEL-op ::= ssa-id `=` `spv.SubgroupBlockWriteINTEL` + storage-class ssa_use `,` ssa-use `:` spirv-element-type + ```mlir + + #### Example: + + ``` + spv.SubgroupBlockWriteINTEL "StorageBuffer" %ptr, %value : i32 + ``` + }]; + + let availability = [ + MinVersion, + MaxVersion, + Extension<[SPV_INTEL_subgroups]>, + Capability<[SPV_C_SubgroupBufferBlockIOINTEL]> + ]; + + let arguments = (ins + SPV_AnyPtr:$ptr, + SPV_Type:$value + ); + + let results = (outs); +} + +// ----- + #endif // SPIRV_GROUP_OPS diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index f729752e02a00..339f588541f6e 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -468,6 +468,19 @@ static LogicalResult verifyLoadStorePtrAndValTypes(LoadStoreOpTy op, Value ptr, return success(); } +template +static LogicalResult verifyBlockReadWritePtrAndValTypes(BlockReadWriteOpTy op, + Value ptr, Value val) { + auto valType = val.getType(); + if (auto valVecTy = valType.dyn_cast()) + valType = valVecTy.getElementType(); + + if (valType != ptr.getType().cast().getPointeeType()) { + return op.emitOpError("mismatch in result type and pointer type"); + } + return success(); +} + static ParseResult parseVariableDecorations(OpAsmParser &parser, OperationState &state) { auto builtInName = llvm::convertToSnakeFromCamelCase( @@ -2025,6 +2038,93 @@ static LogicalResult verify(spirv::GroupNonUniformBallotOp ballotOp) { return success(); } +//===----------------------------------------------------------------------===// +// spv.SubgroupBlockReadINTEL +//===----------------------------------------------------------------------===// + +static ParseResult parseSubgroupBlockReadINTELOp(OpAsmParser &parser, + OperationState &state) { + // Parse the storage class specification + spirv::StorageClass storageClass; + OpAsmParser::OperandType ptrInfo; + Type elementType; + if (parseEnumStrAttr(storageClass, parser) || parser.parseOperand(ptrInfo) || + parser.parseColon() || parser.parseType(elementType)) { + return failure(); + } + + auto ptrType = spirv::PointerType::get(elementType, storageClass); + if (auto valVecTy = elementType.dyn_cast()) + ptrType = spirv::PointerType::get(valVecTy.getElementType(), storageClass); + + if (parser.resolveOperand(ptrInfo, ptrType, state.operands)) { + return failure(); + } + + state.addTypes(elementType); + return success(); +} + +static void print(spirv::SubgroupBlockReadINTELOp blockReadOp, + OpAsmPrinter &printer) { + SmallVector elidedAttrs; + printer << spirv::SubgroupBlockReadINTELOp::getOperationName() << " " + << blockReadOp.ptr(); + printer << " : " << blockReadOp.getType(); +} + +static LogicalResult verify(spirv::SubgroupBlockReadINTELOp blockReadOp) { + if (failed(verifyBlockReadWritePtrAndValTypes(blockReadOp, blockReadOp.ptr(), + blockReadOp.value()))) + return failure(); + + return success(); +} + +//===----------------------------------------------------------------------===// +// spv.SubgroupBlockWriteINTEL +//===----------------------------------------------------------------------===// + +static ParseResult parseSubgroupBlockWriteINTELOp(OpAsmParser &parser, + OperationState &state) { + // Parse the storage class specification + spirv::StorageClass storageClass; + SmallVector operandInfo; + auto loc = parser.getCurrentLocation(); + Type elementType; + if (parseEnumStrAttr(storageClass, parser) || + parser.parseOperandList(operandInfo, 2) || parser.parseColon() || + parser.parseType(elementType)) { + return failure(); + } + + auto ptrType = spirv::PointerType::get(elementType, storageClass); + if (auto valVecTy = elementType.dyn_cast()) + ptrType = spirv::PointerType::get(valVecTy.getElementType(), storageClass); + + if (parser.resolveOperands(operandInfo, {ptrType, elementType}, loc, + state.operands)) { + return failure(); + } + return success(); +} + +static void print(spirv::SubgroupBlockWriteINTELOp blockWriteOp, + OpAsmPrinter &printer) { + SmallVector elidedAttrs; + printer << spirv::SubgroupBlockWriteINTELOp::getOperationName() << " " + << blockWriteOp.ptr() << ", " << blockWriteOp.value(); + printer << " : " << blockWriteOp.value().getType(); +} + +static LogicalResult verify(spirv::SubgroupBlockWriteINTELOp blockWriteOp) { + if (failed(verifyBlockReadWritePtrAndValTypes( + blockWriteOp, blockWriteOp.ptr(), blockWriteOp.value()))) + return failure(); + + return success(); +} + //===----------------------------------------------------------------------===// // spv.GroupNonUniformElectOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SPIRV/Serialization/group-ops.mlir b/mlir/test/Dialect/SPIRV/Serialization/group-ops.mlir index 9e1e851918749..b3aaf63856a5d 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/group-ops.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/group-ops.mlir @@ -19,4 +19,28 @@ spv.module Logical GLSL450 requires #spv.vce { %0 = spv.GroupBroadcast "Workgroup" %value, %localid : f32, vector<3xi32> spv.ReturnValue %0: f32 } + // CHECK-LABEL: @subgroup_block_read_intel + spv.func @subgroup_block_read_intel(%ptr : !spv.ptr) -> i32 "None" { + // CHECK: spv.SubgroupBlockReadINTEL %{{.*}} : i32 + %0 = spv.SubgroupBlockReadINTEL "StorageBuffer" %ptr : i32 + spv.ReturnValue %0: i32 + } + // CHECK-LABEL: @subgroup_block_read_intel_vector + spv.func @subgroup_block_read_intel_vector(%ptr : !spv.ptr) -> vector<3xi32> "None" { + // CHECK: spv.SubgroupBlockReadINTEL %{{.*}} : vector<3xi32> + %0 = spv.SubgroupBlockReadINTEL "StorageBuffer" %ptr : vector<3xi32> + spv.ReturnValue %0: vector<3xi32> + } + // CHECK-LABEL: @subgroup_block_write_intel + spv.func @subgroup_block_write_intel(%ptr : !spv.ptr, %value: i32) -> () "None" { + // CHECK: spv.SubgroupBlockWriteINTEL %{{.*}}, %{{.*}} : i32 + spv.SubgroupBlockWriteINTEL "StorageBuffer" %ptr, %value : i32 + spv.Return + } + // CHECK-LABEL: @subgroup_block_write_intel_vector + spv.func @subgroup_block_write_intel_vector(%ptr : !spv.ptr, %value: vector<3xi32>) -> () "None" { + // CHECK: spv.SubgroupBlockWriteINTEL %{{.*}}, %{{.*}} : vector<3xi32> + spv.SubgroupBlockWriteINTEL "StorageBuffer" %ptr, %value : vector<3xi32> + spv.Return + } } diff --git a/mlir/test/Dialect/SPIRV/group-ops.mlir b/mlir/test/Dialect/SPIRV/group-ops.mlir index 93e9054050ecc..55a07270a348f 100644 --- a/mlir/test/Dialect/SPIRV/group-ops.mlir +++ b/mlir/test/Dialect/SPIRV/group-ops.mlir @@ -61,3 +61,43 @@ func @group_broadcast_negative_locid_vec4(%value: f32, %localid: vector<4xi32> ) %0 = spv.GroupBroadcast "Subgroup" %value, %localid : f32, vector<4xi32> return %0: f32 } + +// ----- + +//===----------------------------------------------------------------------===// +// spv.SubgroupBlockReadINTEL +//===----------------------------------------------------------------------===// + +func @subgroup_block_read_intel(%ptr : !spv.ptr) -> i32 { + // CHECK: spv.SubgroupBlockReadINTEL %{{.*}} : i32 + %0 = spv.SubgroupBlockReadINTEL "StorageBuffer" %ptr : i32 + return %0: i32 +} + +// ----- + +func @subgroup_block_read_intel_vector(%ptr : !spv.ptr) -> vector<3xi32> { + // CHECK: spv.SubgroupBlockReadINTEL %{{.*}} : vector<3xi32> + %0 = spv.SubgroupBlockReadINTEL "StorageBuffer" %ptr : vector<3xi32> + return %0: vector<3xi32> +} + +// ----- + +//===----------------------------------------------------------------------===// +// spv.SubgroupBlockWriteINTEL +//===----------------------------------------------------------------------===// + +func @subgroup_block_write_intel(%ptr : !spv.ptr, %value: i32) -> () { + // CHECK: spv.SubgroupBlockWriteINTEL %{{.*}}, %{{.*}} : i32 + spv.SubgroupBlockWriteINTEL "StorageBuffer" %ptr, %value : i32 + return +} + +// ----- + +func @subgroup_block_write_intel_vector(%ptr : !spv.ptr, %value: vector<3xi32>) -> () { + // CHECK: spv.SubgroupBlockWriteINTEL %{{.*}}, %{{.*}} : vector<3xi32> + spv.SubgroupBlockWriteINTEL "StorageBuffer" %ptr, %value : vector<3xi32> + return +} \ No newline at end of file From ca134374b74e2c2c6e96695502f3a0974c1da4c1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 2 Sep 2020 19:03:28 -0700 Subject: [PATCH 069/465] [X86] Add a /tune: option for clang-cl We recently added support for -mtune. This patch adds /tune: so we can specify the tune CPU from clang-cl. MSVC doesn't support this but icc does. Differential Revision: https://reviews.llvm.org/D86820 --- clang/include/clang/Driver/Options.td | 3 +++ clang/test/Driver/cl-x86-flags.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5a6a196191e7f..912192660c14f 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4696,6 +4696,9 @@ def _SLASH_openmp : CLFlag<"openmp">, HelpText<"Enable OpenMP support">, def _SLASH_openmp_experimental : CLFlag<"openmp:experimental">, HelpText<"Enable OpenMP support with experimental SIMD support">, Alias; +def _SLASH_tune : CLCompileJoined<"tune:">, + HelpText<"Set CPU for optimization without affecting instruction set">, + Alias; // Non-aliases: diff --git a/clang/test/Driver/cl-x86-flags.c b/clang/test/Driver/cl-x86-flags.c index 595959b48828a..15e3e012f2ac1 100644 --- a/clang/test/Driver/cl-x86-flags.c +++ b/clang/test/Driver/cl-x86-flags.c @@ -128,5 +128,9 @@ // RUN: %clang_cl -m64 -arch:avx512 --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx51264 %s // avx51264: argument unused during compilation +// RUN: %clang_cl -m64 -arch:AVX -tune:haswell --target=x86_64-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=tune %s +// tune: "-target-cpu" "sandybridge" +// tune-SAME: "-tune-cpu" "haswell" + void f() { } From b16e8687ab6c977ddab3409939e867828f394311 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 2 Sep 2020 20:44:12 -0700 Subject: [PATCH 070/465] [CodeGenPrepare][X86] Teach optimizeGatherScatterInst to turn a splat pointer into GEP with scalar base and 0 index This helps SelectionDAGBuilder recognize the splat can be used as a uniform base. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D86371 --- llvm/include/llvm/Analysis/VectorUtils.h | 2 +- llvm/lib/Analysis/VectorUtils.cpp | 2 +- llvm/lib/CodeGen/CodeGenPrepare.cpp | 154 ++++++++++-------- .../test/CodeGen/X86/masked_gather_scatter.ll | 44 ++--- .../CodeGenPrepare/X86/gather-scatter-opt.ll | 12 +- 5 files changed, 118 insertions(+), 96 deletions(-) diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 074960e7ced20..8498335bf78e6 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -358,7 +358,7 @@ int getSplatIndex(ArrayRef Mask); /// Get splat value if the input is a splat vector or return nullptr. /// The value may be extracted from a splat constants vector or from /// a sequence of instructions that broadcast a single value into a vector. -const Value *getSplatValue(const Value *V); +Value *getSplatValue(const Value *V); /// Return true if each element of the vector value \p V is poisoned or equal to /// every other non-poisoned element. If an index element is specified, either diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 0bc8b7281d91e..e241300dd2e7c 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -342,7 +342,7 @@ int llvm::getSplatIndex(ArrayRef Mask) { /// This function is not fully general. It checks only 2 cases: /// the input value is (1) a splat constant vector or (2) a sequence /// of instructions that broadcasts a scalar at element 0. -const llvm::Value *llvm::getSplatValue(const Value *V) { +Value *llvm::getSplatValue(const Value *V) { if (isa(V->getType())) if (auto *C = dyn_cast(V)) return C->getSplatValue(); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 3272f36a14360..9a4ed2fab608b 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5314,88 +5314,112 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, /// zero index. bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr) { - const GetElementPtrInst *GEP = dyn_cast(Ptr); - if (!GEP || !GEP->hasIndices()) + // FIXME: Support scalable vectors. + if (isa(Ptr->getType())) return false; - // If the GEP and the gather/scatter aren't in the same BB, don't optimize. - // FIXME: We should support this by sinking the GEP. - if (MemoryInst->getParent() != GEP->getParent()) - return false; - - SmallVector Ops(GEP->op_begin(), GEP->op_end()); + Value *NewAddr; - bool RewriteGEP = false; + if (const auto *GEP = dyn_cast(Ptr)) { + // Don't optimize GEPs that don't have indices. + if (!GEP->hasIndices()) + return false; - if (Ops[0]->getType()->isVectorTy()) { - Ops[0] = const_cast(getSplatValue(Ops[0])); - if (!Ops[0]) + // If the GEP and the gather/scatter aren't in the same BB, don't optimize. + // FIXME: We should support this by sinking the GEP. + if (MemoryInst->getParent() != GEP->getParent()) return false; - RewriteGEP = true; - } - unsigned FinalIndex = Ops.size() - 1; + SmallVector Ops(GEP->op_begin(), GEP->op_end()); - // Ensure all but the last index is 0. - // FIXME: This isn't strictly required. All that's required is that they are - // all scalars or splats. - for (unsigned i = 1; i < FinalIndex; ++i) { - auto *C = dyn_cast(Ops[i]); - if (!C) - return false; - if (isa(C->getType())) - C = C->getSplatValue(); - auto *CI = dyn_cast_or_null(C); - if (!CI || !CI->isZero()) - return false; - // Scalarize the index if needed. - Ops[i] = CI; - } - - // Try to scalarize the final index. - if (Ops[FinalIndex]->getType()->isVectorTy()) { - if (Value *V = const_cast(getSplatValue(Ops[FinalIndex]))) { - auto *C = dyn_cast(V); - // Don't scalarize all zeros vector. - if (!C || !C->isZero()) { - Ops[FinalIndex] = V; - RewriteGEP = true; - } + bool RewriteGEP = false; + + if (Ops[0]->getType()->isVectorTy()) { + Ops[0] = getSplatValue(Ops[0]); + if (!Ops[0]) + return false; + RewriteGEP = true; } - } - // If we made any changes or the we have extra operands, we need to generate - // new instructions. - if (!RewriteGEP && Ops.size() == 2) - return false; + unsigned FinalIndex = Ops.size() - 1; - unsigned NumElts = cast(Ptr->getType())->getNumElements(); + // Ensure all but the last index is 0. + // FIXME: This isn't strictly required. All that's required is that they are + // all scalars or splats. + for (unsigned i = 1; i < FinalIndex; ++i) { + auto *C = dyn_cast(Ops[i]); + if (!C) + return false; + if (isa(C->getType())) + C = C->getSplatValue(); + auto *CI = dyn_cast_or_null(C); + if (!CI || !CI->isZero()) + return false; + // Scalarize the index if needed. + Ops[i] = CI; + } + + // Try to scalarize the final index. + if (Ops[FinalIndex]->getType()->isVectorTy()) { + if (Value *V = getSplatValue(Ops[FinalIndex])) { + auto *C = dyn_cast(V); + // Don't scalarize all zeros vector. + if (!C || !C->isZero()) { + Ops[FinalIndex] = V; + RewriteGEP = true; + } + } + } - IRBuilder<> Builder(MemoryInst); + // If we made any changes or the we have extra operands, we need to generate + // new instructions. + if (!RewriteGEP && Ops.size() == 2) + return false; - Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType()); + unsigned NumElts = cast(Ptr->getType())->getNumElements(); - Value *NewAddr; + IRBuilder<> Builder(MemoryInst); - // If the final index isn't a vector, emit a scalar GEP containing all ops - // and a vector GEP with all zeroes final index. - if (!Ops[FinalIndex]->getType()->isVectorTy()) { - NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front()); - auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts); - NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy)); - } else { - Value *Base = Ops[0]; - Value *Index = Ops[FinalIndex]; + Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType()); - // Create a scalar GEP if there are more than 2 operands. - if (Ops.size() != 2) { - // Replace the last index with 0. - Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); - Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front()); + // If the final index isn't a vector, emit a scalar GEP containing all ops + // and a vector GEP with all zeroes final index. + if (!Ops[FinalIndex]->getType()->isVectorTy()) { + NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front()); + auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts); + NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy)); + } else { + Value *Base = Ops[0]; + Value *Index = Ops[FinalIndex]; + + // Create a scalar GEP if there are more than 2 operands. + if (Ops.size() != 2) { + // Replace the last index with 0. + Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); + Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front()); + } + + // Now create the GEP with scalar pointer and vector index. + NewAddr = Builder.CreateGEP(Base, Index); } + } else if (!isa(Ptr)) { + // Not a GEP, maybe its a splat and we can create a GEP to enable + // SelectionDAGBuilder to use it as a uniform base. + Value *V = getSplatValue(Ptr); + if (!V) + return false; + + unsigned NumElts = cast(Ptr->getType())->getNumElements(); + + IRBuilder<> Builder(MemoryInst); - // Now create the GEP with scalar pointer and vector index. - NewAddr = Builder.CreateGEP(Base, Index); + // Emit a vector GEP with a scalar pointer and all 0s vector index. + Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType()); + auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts); + NewAddr = Builder.CreateGEP(V, Constant::getNullValue(IndexTy)); + } else { + // Constant, SelectionDAGBuilder knows to check if its a splat. + return false; } MemoryInst->replaceUsesOfWith(Ptr, NewAddr); diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index c5781e8340753..88418fd85fe52 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -3323,14 +3323,13 @@ define void @scatter_16i64_constant_indices(i32* %ptr, <16 x i1> %mask, <16 x i3 define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthru) { ; KNL_64-LABEL: splat_ptr_gather: ; KNL_64: # %bb.0: -; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 +; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; KNL_64-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL_64-NEXT: kshiftlw $12, %k0, %k0 ; KNL_64-NEXT: kshiftrw $12, %k0, %k1 -; KNL_64-NEXT: vmovq %rdi, %xmm0 -; KNL_64-NEXT: vpbroadcastq %xmm0, %ymm0 -; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm1 {%k1} +; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1} ; KNL_64-NEXT: vmovdqa %xmm1, %xmm0 ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -3342,8 +3341,9 @@ define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthr ; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL_32-NEXT: kshiftlw $12, %k0, %k0 ; KNL_32-NEXT: kshiftrw $12, %k0, %k1 -; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm0 -; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1} +; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1} ; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -3352,18 +3352,18 @@ define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthr ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX-NEXT: vpmovd2m %xmm0, %k1 -; SKX-NEXT: vpbroadcastq %rdi, %ymm0 -; SKX-NEXT: vpgatherqd (,%ymm0), %xmm1 {%k1} +; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1} ; SKX-NEXT: vmovdqa %xmm1, %xmm0 -; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; SKX_32-LABEL: splat_ptr_gather: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX_32-NEXT: vpmovd2m %xmm0, %k1 -; SKX_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm0 -; SKX_32-NEXT: vpgatherdd (,%xmm0), %xmm1 {%k1} +; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX_32-NEXT: vpgatherdd (%eax,%xmm0,4), %xmm1 {%k1} ; SKX_32-NEXT: vmovdqa %xmm1, %xmm0 ; SKX_32-NEXT: retl %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 @@ -3376,14 +3376,13 @@ declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { ; KNL_64-LABEL: splat_ptr_scatter: ; KNL_64: # %bb.0: -; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 +; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; KNL_64-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL_64-NEXT: kshiftlw $12, %k0, %k0 ; KNL_64-NEXT: kshiftrw $12, %k0, %k1 -; KNL_64-NEXT: vmovq %rdi, %xmm0 -; KNL_64-NEXT: vpbroadcastq %xmm0, %ymm0 -; KNL_64-NEXT: vpscatterqd %ymm1, (,%zmm0) {%k1} +; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; @@ -3394,8 +3393,9 @@ define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { ; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL_32-NEXT: kshiftlw $12, %k0, %k0 ; KNL_32-NEXT: kshiftrw $12, %k0, %k1 -; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm0 -; KNL_32-NEXT: vpscatterdd %zmm1, (,%zmm0) {%k1} +; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl ; @@ -3403,17 +3403,17 @@ define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX-NEXT: vpmovd2m %xmm0, %k1 -; SKX-NEXT: vpbroadcastq %rdi, %ymm0 -; SKX-NEXT: vpscatterqd %xmm1, (,%ymm0) {%k1} -; SKX-NEXT: vzeroupper +; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} ; SKX-NEXT: retq ; ; SKX_32-LABEL: splat_ptr_scatter: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX_32-NEXT: vpmovd2m %xmm0, %k1 -; SKX_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm0 -; SKX_32-NEXT: vpscatterdd %xmm1, (,%xmm0) {%k1} +; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX_32-NEXT: vpscatterdd %xmm1, (%eax,%xmm0,4) {%k1} ; SKX_32-NEXT: retl %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll index c1674ad4ca45d..adb1930ca7829 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll @@ -87,10 +87,9 @@ define <4 x i32> @global_struct_splat() { define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthru) { ; CHECK-LABEL: @splat_ptr_gather( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> undef, i32* [[PTR:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]]) -; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer @@ -100,9 +99,8 @@ define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthr define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { ; CHECK-LABEL: @splat_ptr_scatter( -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32*> undef, i32* [[PTR:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL:%.*]], <4 x i32*> [[TMP2]], i32 4, <4 x i1> [[MASK:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL:%.*]], <4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]]) ; CHECK-NEXT: ret void ; %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 From 426fa35b655ffb8647d9d69580a69627c0d19024 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 2 Sep 2020 20:48:50 -0700 Subject: [PATCH 071/465] [lldb] Always record both the working and home directory. Treat the home directory like the current working directory and always capture both in the VFS. --- lldb/source/Initialization/SystemInitializerCommon.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lldb/source/Initialization/SystemInitializerCommon.cpp b/lldb/source/Initialization/SystemInitializerCommon.cpp index d352173e11588..b29138c4884f6 100644 --- a/lldb/source/Initialization/SystemInitializerCommon.cpp +++ b/lldb/source/Initialization/SystemInitializerCommon.cpp @@ -79,9 +79,10 @@ static llvm::Error InitializeFileSystem() { repro::FileProvider &fp = g->GetOrCreate(); FileSystem::Initialize(fp.GetFileCollector()); - repro::WorkingDirectoryProvider &wp = - g->GetOrCreate(); - fp.RecordInterestingDirectory(wp.GetDirectory()); + fp.RecordInterestingDirectory( + g->GetOrCreate().GetDirectory()); + fp.RecordInterestingDirectory( + g->GetOrCreate().GetDirectory()); return llvm::Error::success(); } From 883399c8402188520870f99e7d8b3244f000e698 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 2 Sep 2020 14:28:27 -0700 Subject: [PATCH 072/465] [NewPM][Lint] Port -lint to NewPM This also changes -lint from an analysis to a pass. It's similar to -verify, and that is a normal pass, and lives in llvm/IR. Reviewed By: ychen Differential Revision: https://reviews.llvm.org/D87057 --- llvm/include/llvm/{Analysis => IR}/Lint.h | 29 +- llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/LinkAllPasses.h | 2 +- llvm/lib/Analysis/Analysis.cpp | 1 - llvm/lib/Analysis/CMakeLists.txt | 1 - llvm/lib/IR/CMakeLists.txt | 1 + llvm/lib/IR/Core.cpp | 2 + llvm/lib/{Analysis => IR}/Lint.cpp | 306 ++++++++++-------- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/test/Other/lint.ll | 1 + .../gn/secondary/llvm/lib/Analysis/BUILD.gn | 1 - llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn | 1 + 13 files changed, 190 insertions(+), 159 deletions(-) rename llvm/include/llvm/{Analysis => IR}/Lint.h (70%) rename llvm/lib/{Analysis => IR}/Lint.cpp (81%) diff --git a/llvm/include/llvm/Analysis/Lint.h b/llvm/include/llvm/IR/Lint.h similarity index 70% rename from llvm/include/llvm/Analysis/Lint.h rename to llvm/include/llvm/IR/Lint.h index 0fea81e215c91..cd3f69c64df43 100644 --- a/llvm/include/llvm/Analysis/Lint.h +++ b/llvm/include/llvm/IR/Lint.h @@ -1,4 +1,4 @@ -//===-- llvm/Analysis/Lint.h - LLVM IR Lint ---------------------*- C++ -*-===// +//===-- llvm/IR/Lint.h - LLVM IR Lint ---------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -16,8 +16,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_LINT_H -#define LLVM_ANALYSIS_LINT_H +#ifndef LLVM_IR_LINT_H +#define LLVM_IR_LINT_H + +#include "llvm/IR/PassManager.h" namespace llvm { @@ -30,19 +32,20 @@ class Function; /// Check a module or function. FunctionPass *createLintPass(); -/// Check a module. +/// Lint a module. /// /// This should only be used for debugging, because it plays games with /// PassManagers and stuff. -void lintModule( - const Module &M ///< The module to be checked -); +void lintModule(const Module &M); + +// Lint a function. +void lintFunction(const Function &F); -// lintFunction - Check a function. -void lintFunction( - const Function &F ///< The function to be checked -); +class LintPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; -} // End llvm namespace +} // namespace llvm -#endif +#endif // LLVM_IR_LINT_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 7e512ba56c728..63ae19d8495db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -218,7 +218,7 @@ void initializeLegalizerPass(PassRegistry&); void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &); void initializeGISelKnownBitsAnalysisPass(PassRegistry &); void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&); -void initializeLintPass(PassRegistry&); +void initializeLintLegacyPassPass(PassRegistry &); void initializeLiveDebugValuesPass(PassRegistry&); void initializeLiveDebugVariablesPass(PassRegistry&); void initializeLiveIntervalsPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index dfd0e9c8da705..c4bbbd513b2c1 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -24,7 +24,6 @@ #include "llvm/Analysis/DomPrinter.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IntervalPartition.h" -#include "llvm/Analysis/Lint.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/RegionPass.h" @@ -37,6 +36,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Lint.h" #include "llvm/Support/Valgrind.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index a9ece42df8563..e0a4b9dd10a9f 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -57,7 +57,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeLazyValueInfoWrapperPassPass(Registry); initializeLazyValueInfoPrinterPass(Registry); initializeLegacyDivergenceAnalysisPass(Registry); - initializeLintPass(Registry); initializeLoopInfoWrapperPassPass(Registry); initializeMemDepPrinterPass(Registry); initializeMemDerefPrinterPass(Registry); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index f50439bc87627..06cc2ac900b87 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -69,7 +69,6 @@ add_llvm_component_library(LLVMAnalysis LazyCallGraph.cpp LazyValueInfo.cpp LegacyDivergenceAnalysis.cpp - Lint.cpp Loads.cpp LoopAccessAnalysis.cpp LoopAnalysisManager.cpp diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index 49805d5b8c274..3dd49a60870e3 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -32,6 +32,7 @@ add_llvm_component_library(LLVMCore LLVMContextImpl.cpp LLVMRemarkStreamer.cpp LegacyPassManager.cpp + Lint.cpp MDBuilder.cpp Mangler.cpp Metadata.cpp diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 8598acc82804f..9403f9159cf31 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Lint.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" @@ -46,6 +47,7 @@ using namespace llvm; void llvm::initializeCore(PassRegistry &Registry) { initializeDominatorTreeWrapperPassPass(Registry); + initializeLintLegacyPassPass(Registry); initializePrintModulePassWrapperPass(Registry); initializePrintFunctionPassWrapperPass(Registry); initializeSafepointIRVerifierPass(Registry); diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/IR/Lint.cpp similarity index 81% rename from llvm/lib/Analysis/Lint.cpp rename to llvm/lib/IR/Lint.cpp index 4a159d6035f0d..4db508a01f0ec 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/IR/Lint.cpp @@ -33,7 +33,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Lint.h" +#include "llvm/IR/Lint.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallPtrSet.h" @@ -63,6 +63,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" @@ -80,134 +81,102 @@ using namespace llvm; namespace { - namespace MemRef { - static const unsigned Read = 1; - static const unsigned Write = 2; - static const unsigned Callee = 4; - static const unsigned Branchee = 8; - } // end namespace MemRef - - class Lint : public FunctionPass, public InstVisitor { - friend class InstVisitor; - - void visitFunction(Function &F); - - void visitCallBase(CallBase &CB); - void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, - MaybeAlign Alignment, Type *Ty, unsigned Flags); - void visitEHBeginCatch(IntrinsicInst *II); - void visitEHEndCatch(IntrinsicInst *II); - - void visitReturnInst(ReturnInst &I); - void visitLoadInst(LoadInst &I); - void visitStoreInst(StoreInst &I); - void visitXor(BinaryOperator &I); - void visitSub(BinaryOperator &I); - void visitLShr(BinaryOperator &I); - void visitAShr(BinaryOperator &I); - void visitShl(BinaryOperator &I); - void visitSDiv(BinaryOperator &I); - void visitUDiv(BinaryOperator &I); - void visitSRem(BinaryOperator &I); - void visitURem(BinaryOperator &I); - void visitAllocaInst(AllocaInst &I); - void visitVAArgInst(VAArgInst &I); - void visitIndirectBrInst(IndirectBrInst &I); - void visitExtractElementInst(ExtractElementInst &I); - void visitInsertElementInst(InsertElementInst &I); - void visitUnreachableInst(UnreachableInst &I); - - Value *findValue(Value *V, bool OffsetOk) const; - Value *findValueImpl(Value *V, bool OffsetOk, - SmallPtrSetImpl &Visited) const; - - public: - Module *Mod; - const DataLayout *DL; - AliasAnalysis *AA; - AssumptionCache *AC; - DominatorTree *DT; - TargetLibraryInfo *TLI; - - std::string Messages; - raw_string_ostream MessagesStr; - - static char ID; // Pass identification, replacement for typeid - Lint() : FunctionPass(ID), MessagesStr(Messages) { - initializeLintPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - void print(raw_ostream &O, const Module *M) const override {} - - void WriteValues(ArrayRef Vs) { - for (const Value *V : Vs) { - if (!V) - continue; - if (isa(V)) { - MessagesStr << *V << '\n'; - } else { - V->printAsOperand(MessagesStr, true, Mod); - MessagesStr << '\n'; - } +namespace MemRef { +static const unsigned Read = 1; +static const unsigned Write = 2; +static const unsigned Callee = 4; +static const unsigned Branchee = 8; +} // end namespace MemRef + +class Lint : public InstVisitor { + friend class InstVisitor; + + void visitFunction(Function &F); + + void visitCallBase(CallBase &CB); + void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, + MaybeAlign Alignment, Type *Ty, unsigned Flags); + void visitEHBeginCatch(IntrinsicInst *II); + void visitEHEndCatch(IntrinsicInst *II); + + void visitReturnInst(ReturnInst &I); + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitXor(BinaryOperator &I); + void visitSub(BinaryOperator &I); + void visitLShr(BinaryOperator &I); + void visitAShr(BinaryOperator &I); + void visitShl(BinaryOperator &I); + void visitSDiv(BinaryOperator &I); + void visitUDiv(BinaryOperator &I); + void visitSRem(BinaryOperator &I); + void visitURem(BinaryOperator &I); + void visitAllocaInst(AllocaInst &I); + void visitVAArgInst(VAArgInst &I); + void visitIndirectBrInst(IndirectBrInst &I); + void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitUnreachableInst(UnreachableInst &I); + + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, + SmallPtrSetImpl &Visited) const; + +public: + Module *Mod; + const DataLayout *DL; + AliasAnalysis *AA; + AssumptionCache *AC; + DominatorTree *DT; + TargetLibraryInfo *TLI; + + std::string Messages; + raw_string_ostream MessagesStr; + + Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, + AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) + : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI), + MessagesStr(Messages) {} + + void WriteValues(ArrayRef Vs) { + for (const Value *V : Vs) { + if (!V) + continue; + if (isa(V)) { + MessagesStr << *V << '\n'; + } else { + V->printAsOperand(MessagesStr, true, Mod); + MessagesStr << '\n'; } } + } - /// A check failed, so printout out the condition and the message. - /// - /// This provides a nice place to put a breakpoint if you want to see why - /// something is not correct. - void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } - - /// A check failed (with values to print). - /// - /// This calls the Message-only version so that the above is easier to set - /// a breakpoint on. - template - void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) { - CheckFailed(Message); - WriteValues({V1, Vs...}); - } - }; + /// A check failed, so printout out the condition and the message. + /// + /// This provides a nice place to put a breakpoint if you want to see why + /// something is not correct. + void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } + + /// A check failed (with values to print). + /// + /// This calls the Message-only version so that the above is easier to set + /// a breakpoint on. + template + void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { + CheckFailed(Message); + WriteValues({V1, Vs...}); + } +}; } // end anonymous namespace -char Lint::ID = 0; -INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", - false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", - false, true) - // Assert - We know that cond should be true, if not print an error message. -#define Assert(C, ...) \ - do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false) - -// Lint::run - This is the main Analysis entry point for a -// function. -// -bool Lint::runOnFunction(Function &F) { - Mod = F.getParent(); - DL = &F.getParent()->getDataLayout(); - AA = &getAnalysis().getAAResults(); - AC = &getAnalysis().getAssumptionCache(F); - DT = &getAnalysis().getDomTree(); - TLI = &getAnalysis().getTLI(F); - visit(F); - dbgs() << MessagesStr.str(); - Messages.clear(); - return false; -} +#define Assert(C, ...) \ + do { \ + if (!(C)) { \ + CheckFailed(__VA_ARGS__); \ + return; \ + } \ + } while (false) void Lint::visitFunction(Function &F) { // This isn't undefined behavior, it's just a little unusual, and it's a @@ -281,8 +250,7 @@ void Lint::visitCallBase(CallBase &I) { // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { - Type *Ty = - cast(Formal->getType())->getElementType(); + Type *Ty = cast(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), DL->getABITypeAlign(Ty), Ty, MemRef::Read | MemRef::Write); @@ -309,12 +277,12 @@ void Lint::visitCallBase(CallBase &I) { } } - if (IntrinsicInst *II = dyn_cast(&I)) switch (II->getIntrinsicID()) { - default: break; + default: + break; - // TODO: Check more intrinsics + // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast(&I); @@ -553,7 +521,8 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, VectorType *VecTy = dyn_cast(V->getType()); if (!VecTy) { - KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast(V), DT); + KnownBits Known = + computeKnownBits(V, DL, 0, AC, dyn_cast(V), DT); return Known.isZero(); } @@ -682,11 +651,13 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (!VisitedBlocks.insert(BB).second) break; if (Value *U = - FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) + FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) return findValueImpl(U, OffsetOk, Visited); - if (BBI != BB->begin()) break; + if (BBI != BB->begin()) + break; BB = BB->getUniquePredecessor(); - if (!BB) break; + if (!BB) + break; BBI = BB->end(); } } else if (PHINode *PN = dyn_cast(V)) { @@ -696,8 +667,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (CI->isNoopCast(*DL)) return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast(V)) { - if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), - Ex->getIndices())) + if (Value *W = + FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast(V)) { @@ -728,22 +699,75 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, return V; } +PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { + auto *Mod = F.getParent(); + auto *DL = &F.getParent()->getDataLayout(); + auto *AA = &AM.getResult(F); + auto *AC = &AM.getResult(F); + auto *DT = &AM.getResult(F); + auto *TLI = &AM.getResult(F); + Lint L(Mod, DL, AA, AC, DT, TLI); + L.visit(F); + dbgs() << L.MessagesStr.str(); + return PreservedAnalyses::all(); +} + +class LintLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + LintLegacyPass() : FunctionPass(ID) { + initializeLintLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + void print(raw_ostream &O, const Module *M) const override {} +}; + +char LintLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", + false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", + false, true) + +bool LintLegacyPass::runOnFunction(Function &F) { + auto *Mod = F.getParent(); + auto *DL = &F.getParent()->getDataLayout(); + auto *AA = &getAnalysis().getAAResults(); + auto *AC = &getAnalysis().getAssumptionCache(F); + auto *DT = &getAnalysis().getDomTree(); + auto *TLI = &getAnalysis().getTLI(F); + Lint L(Mod, DL, AA, AC, DT, TLI); + L.visit(F); + dbgs() << L.MessagesStr.str(); + return false; +} + //===----------------------------------------------------------------------===// // Implement the public interfaces to this file... //===----------------------------------------------------------------------===// -FunctionPass *llvm::createLintPass() { - return new Lint(); -} +FunctionPass *llvm::createLintPass() { return new LintLegacyPass(); } /// lintFunction - Check a function for errors, printing messages on stderr. /// void llvm::lintFunction(const Function &f) { - Function &F = const_cast(f); + Function &F = const_cast(f); assert(!F.isDeclaration() && "Cannot lint external functions"); legacy::FunctionPassManager FPM(F.getParent()); - Lint *V = new Lint(); + auto *V = new LintLegacyPass(); FPM.add(V); FPM.run(F); } @@ -752,7 +776,7 @@ void llvm::lintFunction(const Function &f) { /// void llvm::lintModule(const Module &M) { legacy::PassManager PM; - Lint *V = new Lint(); + auto *V = new LintLegacyPass(); PM.add(V); - PM.run(const_cast(M)); + PM.run(const_cast(M)); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 3a6b736dae3cf..59632765829ff 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -61,6 +61,7 @@ #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Lint.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/SafepointIRVerifier.h" #include "llvm/IR/Verifier.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 12e04ad91128d..b0d1d2a63a830 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -205,6 +205,7 @@ FUNCTION_PASS("irce", IRCEPass()) FUNCTION_PASS("float2int", Float2IntPass()) FUNCTION_PASS("no-op-function", NoOpFunctionPass()) FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass()) +FUNCTION_PASS("lint", LintPass()) FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings()) FUNCTION_PASS("loweratomic", LowerAtomicPass()) FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) diff --git a/llvm/test/Other/lint.ll b/llvm/test/Other/lint.ll index 45c8bd55fa014..a156301c1c26b 100644 --- a/llvm/test/Other/lint.ll +++ b/llvm/test/Other/lint.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -lint -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes=lint -disable-output < %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64" declare fastcc void @bar() diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 1c6d22dd672af..3ec2f15abd48b 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -67,7 +67,6 @@ static_library("Analysis") { "LazyCallGraph.cpp", "LazyValueInfo.cpp", "LegacyDivergenceAnalysis.cpp", - "Lint.cpp", "Loads.cpp", "LoopAccessAnalysis.cpp", "LoopAnalysisManager.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn index aa2631a9d229a..c934480512573 100644 --- a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn @@ -47,6 +47,7 @@ static_library("IR") { "LLVMContextImpl.cpp", "LLVMRemarkStreamer.cpp", "LegacyPassManager.cpp", + "Lint.cpp", "MDBuilder.cpp", "Mangler.cpp", "Metadata.cpp", From e440b4933ada0843c6a3c3d3076adafc69415a00 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 2 Sep 2020 21:34:20 -0700 Subject: [PATCH 073/465] Revert "[NewPM][Lint] Port -lint to NewPM" This reverts commit 883399c8402188520870f99e7d8b3244f000e698. --- llvm/include/llvm/{IR => Analysis}/Lint.h | 29 +- llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/LinkAllPasses.h | 2 +- llvm/lib/Analysis/Analysis.cpp | 1 + llvm/lib/Analysis/CMakeLists.txt | 1 + llvm/lib/{IR => Analysis}/Lint.cpp | 306 ++++++++---------- llvm/lib/IR/CMakeLists.txt | 1 - llvm/lib/IR/Core.cpp | 2 - llvm/lib/Passes/PassBuilder.cpp | 1 - llvm/lib/Passes/PassRegistry.def | 1 - llvm/test/Other/lint.ll | 1 - .../gn/secondary/llvm/lib/Analysis/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn | 1 - 13 files changed, 159 insertions(+), 190 deletions(-) rename llvm/include/llvm/{IR => Analysis}/Lint.h (70%) rename llvm/lib/{IR => Analysis}/Lint.cpp (81%) diff --git a/llvm/include/llvm/IR/Lint.h b/llvm/include/llvm/Analysis/Lint.h similarity index 70% rename from llvm/include/llvm/IR/Lint.h rename to llvm/include/llvm/Analysis/Lint.h index cd3f69c64df43..0fea81e215c91 100644 --- a/llvm/include/llvm/IR/Lint.h +++ b/llvm/include/llvm/Analysis/Lint.h @@ -1,4 +1,4 @@ -//===-- llvm/IR/Lint.h - LLVM IR Lint ---------------------------*- C++ -*-===// +//===-- llvm/Analysis/Lint.h - LLVM IR Lint ---------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -16,10 +16,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_IR_LINT_H -#define LLVM_IR_LINT_H - -#include "llvm/IR/PassManager.h" +#ifndef LLVM_ANALYSIS_LINT_H +#define LLVM_ANALYSIS_LINT_H namespace llvm { @@ -32,20 +30,19 @@ class Function; /// Check a module or function. FunctionPass *createLintPass(); -/// Lint a module. +/// Check a module. /// /// This should only be used for debugging, because it plays games with /// PassManagers and stuff. -void lintModule(const Module &M); - -// Lint a function. -void lintFunction(const Function &F); +void lintModule( + const Module &M ///< The module to be checked +); -class LintPass : public PassInfoMixin { -public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -}; +// lintFunction - Check a function. +void lintFunction( + const Function &F ///< The function to be checked +); -} // namespace llvm +} // End llvm namespace -#endif // LLVM_IR_LINT_H +#endif diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 63ae19d8495db..7e512ba56c728 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -218,7 +218,7 @@ void initializeLegalizerPass(PassRegistry&); void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &); void initializeGISelKnownBitsAnalysisPass(PassRegistry &); void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&); -void initializeLintLegacyPassPass(PassRegistry &); +void initializeLintPass(PassRegistry&); void initializeLiveDebugValuesPass(PassRegistry&); void initializeLiveDebugVariablesPass(PassRegistry&); void initializeLiveIntervalsPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index c4bbbd513b2c1..dfd0e9c8da705 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -24,6 +24,7 @@ #include "llvm/Analysis/DomPrinter.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IntervalPartition.h" +#include "llvm/Analysis/Lint.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/RegionPass.h" @@ -36,7 +37,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" -#include "llvm/IR/Lint.h" #include "llvm/Support/Valgrind.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index e0a4b9dd10a9f..a9ece42df8563 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -57,6 +57,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeLazyValueInfoWrapperPassPass(Registry); initializeLazyValueInfoPrinterPass(Registry); initializeLegacyDivergenceAnalysisPass(Registry); + initializeLintPass(Registry); initializeLoopInfoWrapperPassPass(Registry); initializeMemDepPrinterPass(Registry); initializeMemDerefPrinterPass(Registry); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 06cc2ac900b87..f50439bc87627 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -69,6 +69,7 @@ add_llvm_component_library(LLVMAnalysis LazyCallGraph.cpp LazyValueInfo.cpp LegacyDivergenceAnalysis.cpp + Lint.cpp Loads.cpp LoopAccessAnalysis.cpp LoopAnalysisManager.cpp diff --git a/llvm/lib/IR/Lint.cpp b/llvm/lib/Analysis/Lint.cpp similarity index 81% rename from llvm/lib/IR/Lint.cpp rename to llvm/lib/Analysis/Lint.cpp index 4db508a01f0ec..4a159d6035f0d 100644 --- a/llvm/lib/IR/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -33,7 +33,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Lint.h" +#include "llvm/Analysis/Lint.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallPtrSet.h" @@ -63,7 +63,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" @@ -81,102 +80,134 @@ using namespace llvm; namespace { -namespace MemRef { -static const unsigned Read = 1; -static const unsigned Write = 2; -static const unsigned Callee = 4; -static const unsigned Branchee = 8; -} // end namespace MemRef - -class Lint : public InstVisitor { - friend class InstVisitor; - - void visitFunction(Function &F); - - void visitCallBase(CallBase &CB); - void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, - MaybeAlign Alignment, Type *Ty, unsigned Flags); - void visitEHBeginCatch(IntrinsicInst *II); - void visitEHEndCatch(IntrinsicInst *II); - - void visitReturnInst(ReturnInst &I); - void visitLoadInst(LoadInst &I); - void visitStoreInst(StoreInst &I); - void visitXor(BinaryOperator &I); - void visitSub(BinaryOperator &I); - void visitLShr(BinaryOperator &I); - void visitAShr(BinaryOperator &I); - void visitShl(BinaryOperator &I); - void visitSDiv(BinaryOperator &I); - void visitUDiv(BinaryOperator &I); - void visitSRem(BinaryOperator &I); - void visitURem(BinaryOperator &I); - void visitAllocaInst(AllocaInst &I); - void visitVAArgInst(VAArgInst &I); - void visitIndirectBrInst(IndirectBrInst &I); - void visitExtractElementInst(ExtractElementInst &I); - void visitInsertElementInst(InsertElementInst &I); - void visitUnreachableInst(UnreachableInst &I); - - Value *findValue(Value *V, bool OffsetOk) const; - Value *findValueImpl(Value *V, bool OffsetOk, - SmallPtrSetImpl &Visited) const; - -public: - Module *Mod; - const DataLayout *DL; - AliasAnalysis *AA; - AssumptionCache *AC; - DominatorTree *DT; - TargetLibraryInfo *TLI; - - std::string Messages; - raw_string_ostream MessagesStr; - - Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, - AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) - : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI), - MessagesStr(Messages) {} - - void WriteValues(ArrayRef Vs) { - for (const Value *V : Vs) { - if (!V) - continue; - if (isa(V)) { - MessagesStr << *V << '\n'; - } else { - V->printAsOperand(MessagesStr, true, Mod); - MessagesStr << '\n'; + namespace MemRef { + static const unsigned Read = 1; + static const unsigned Write = 2; + static const unsigned Callee = 4; + static const unsigned Branchee = 8; + } // end namespace MemRef + + class Lint : public FunctionPass, public InstVisitor { + friend class InstVisitor; + + void visitFunction(Function &F); + + void visitCallBase(CallBase &CB); + void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, + MaybeAlign Alignment, Type *Ty, unsigned Flags); + void visitEHBeginCatch(IntrinsicInst *II); + void visitEHEndCatch(IntrinsicInst *II); + + void visitReturnInst(ReturnInst &I); + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitXor(BinaryOperator &I); + void visitSub(BinaryOperator &I); + void visitLShr(BinaryOperator &I); + void visitAShr(BinaryOperator &I); + void visitShl(BinaryOperator &I); + void visitSDiv(BinaryOperator &I); + void visitUDiv(BinaryOperator &I); + void visitSRem(BinaryOperator &I); + void visitURem(BinaryOperator &I); + void visitAllocaInst(AllocaInst &I); + void visitVAArgInst(VAArgInst &I); + void visitIndirectBrInst(IndirectBrInst &I); + void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitUnreachableInst(UnreachableInst &I); + + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, + SmallPtrSetImpl &Visited) const; + + public: + Module *Mod; + const DataLayout *DL; + AliasAnalysis *AA; + AssumptionCache *AC; + DominatorTree *DT; + TargetLibraryInfo *TLI; + + std::string Messages; + raw_string_ostream MessagesStr; + + static char ID; // Pass identification, replacement for typeid + Lint() : FunctionPass(ID), MessagesStr(Messages) { + initializeLintPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + void print(raw_ostream &O, const Module *M) const override {} + + void WriteValues(ArrayRef Vs) { + for (const Value *V : Vs) { + if (!V) + continue; + if (isa(V)) { + MessagesStr << *V << '\n'; + } else { + V->printAsOperand(MessagesStr, true, Mod); + MessagesStr << '\n'; + } } } - } - /// A check failed, so printout out the condition and the message. - /// - /// This provides a nice place to put a breakpoint if you want to see why - /// something is not correct. - void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } - - /// A check failed (with values to print). - /// - /// This calls the Message-only version so that the above is easier to set - /// a breakpoint on. - template - void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { - CheckFailed(Message); - WriteValues({V1, Vs...}); - } -}; + /// A check failed, so printout out the condition and the message. + /// + /// This provides a nice place to put a breakpoint if you want to see why + /// something is not correct. + void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } + + /// A check failed (with values to print). + /// + /// This calls the Message-only version so that the above is easier to set + /// a breakpoint on. + template + void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) { + CheckFailed(Message); + WriteValues({V1, Vs...}); + } + }; } // end anonymous namespace +char Lint::ID = 0; +INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", + false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", + false, true) + // Assert - We know that cond should be true, if not print an error message. -#define Assert(C, ...) \ - do { \ - if (!(C)) { \ - CheckFailed(__VA_ARGS__); \ - return; \ - } \ - } while (false) +#define Assert(C, ...) \ + do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false) + +// Lint::run - This is the main Analysis entry point for a +// function. +// +bool Lint::runOnFunction(Function &F) { + Mod = F.getParent(); + DL = &F.getParent()->getDataLayout(); + AA = &getAnalysis().getAAResults(); + AC = &getAnalysis().getAssumptionCache(F); + DT = &getAnalysis().getDomTree(); + TLI = &getAnalysis().getTLI(F); + visit(F); + dbgs() << MessagesStr.str(); + Messages.clear(); + return false; +} void Lint::visitFunction(Function &F) { // This isn't undefined behavior, it's just a little unusual, and it's a @@ -250,7 +281,8 @@ void Lint::visitCallBase(CallBase &I) { // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { - Type *Ty = cast(Formal->getType())->getElementType(); + Type *Ty = + cast(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), DL->getABITypeAlign(Ty), Ty, MemRef::Read | MemRef::Write); @@ -277,12 +309,12 @@ void Lint::visitCallBase(CallBase &I) { } } + if (IntrinsicInst *II = dyn_cast(&I)) switch (II->getIntrinsicID()) { - default: - break; + default: break; - // TODO: Check more intrinsics + // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast(&I); @@ -521,8 +553,7 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, VectorType *VecTy = dyn_cast(V->getType()); if (!VecTy) { - KnownBits Known = - computeKnownBits(V, DL, 0, AC, dyn_cast(V), DT); + KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast(V), DT); return Known.isZero(); } @@ -651,13 +682,11 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (!VisitedBlocks.insert(BB).second) break; if (Value *U = - FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) + FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) return findValueImpl(U, OffsetOk, Visited); - if (BBI != BB->begin()) - break; + if (BBI != BB->begin()) break; BB = BB->getUniquePredecessor(); - if (!BB) - break; + if (!BB) break; BBI = BB->end(); } } else if (PHINode *PN = dyn_cast(V)) { @@ -667,8 +696,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (CI->isNoopCast(*DL)) return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast(V)) { - if (Value *W = - FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) + if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), + Ex->getIndices())) if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast(V)) { @@ -699,75 +728,22 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, return V; } -PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { - auto *Mod = F.getParent(); - auto *DL = &F.getParent()->getDataLayout(); - auto *AA = &AM.getResult(F); - auto *AC = &AM.getResult(F); - auto *DT = &AM.getResult(F); - auto *TLI = &AM.getResult(F); - Lint L(Mod, DL, AA, AC, DT, TLI); - L.visit(F); - dbgs() << L.MessagesStr.str(); - return PreservedAnalyses::all(); -} - -class LintLegacyPass : public FunctionPass { -public: - static char ID; // Pass identification, replacement for typeid - LintLegacyPass() : FunctionPass(ID) { - initializeLintLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - void print(raw_ostream &O, const Module *M) const override {} -}; - -char LintLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", - false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", - false, true) - -bool LintLegacyPass::runOnFunction(Function &F) { - auto *Mod = F.getParent(); - auto *DL = &F.getParent()->getDataLayout(); - auto *AA = &getAnalysis().getAAResults(); - auto *AC = &getAnalysis().getAssumptionCache(F); - auto *DT = &getAnalysis().getDomTree(); - auto *TLI = &getAnalysis().getTLI(F); - Lint L(Mod, DL, AA, AC, DT, TLI); - L.visit(F); - dbgs() << L.MessagesStr.str(); - return false; -} - //===----------------------------------------------------------------------===// // Implement the public interfaces to this file... //===----------------------------------------------------------------------===// -FunctionPass *llvm::createLintPass() { return new LintLegacyPass(); } +FunctionPass *llvm::createLintPass() { + return new Lint(); +} /// lintFunction - Check a function for errors, printing messages on stderr. /// void llvm::lintFunction(const Function &f) { - Function &F = const_cast(f); + Function &F = const_cast(f); assert(!F.isDeclaration() && "Cannot lint external functions"); legacy::FunctionPassManager FPM(F.getParent()); - auto *V = new LintLegacyPass(); + Lint *V = new Lint(); FPM.add(V); FPM.run(F); } @@ -776,7 +752,7 @@ void llvm::lintFunction(const Function &f) { /// void llvm::lintModule(const Module &M) { legacy::PassManager PM; - auto *V = new LintLegacyPass(); + Lint *V = new Lint(); PM.add(V); - PM.run(const_cast(M)); + PM.run(const_cast(M)); } diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index 3dd49a60870e3..49805d5b8c274 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -32,7 +32,6 @@ add_llvm_component_library(LLVMCore LLVMContextImpl.cpp LLVMRemarkStreamer.cpp LegacyPassManager.cpp - Lint.cpp MDBuilder.cpp Mangler.cpp Metadata.cpp diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 9403f9159cf31..8598acc82804f 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -26,7 +26,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Lint.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" @@ -47,7 +46,6 @@ using namespace llvm; void llvm::initializeCore(PassRegistry &Registry) { initializeDominatorTreeWrapperPassPass(Registry); - initializeLintLegacyPassPass(Registry); initializePrintModulePassWrapperPass(Registry); initializePrintFunctionPassWrapperPass(Registry); initializeSafepointIRVerifierPass(Registry); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 59632765829ff..3a6b736dae3cf 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -61,7 +61,6 @@ #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRPrintingPasses.h" -#include "llvm/IR/Lint.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/SafepointIRVerifier.h" #include "llvm/IR/Verifier.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index b0d1d2a63a830..12e04ad91128d 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -205,7 +205,6 @@ FUNCTION_PASS("irce", IRCEPass()) FUNCTION_PASS("float2int", Float2IntPass()) FUNCTION_PASS("no-op-function", NoOpFunctionPass()) FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass()) -FUNCTION_PASS("lint", LintPass()) FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings()) FUNCTION_PASS("loweratomic", LowerAtomicPass()) FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) diff --git a/llvm/test/Other/lint.ll b/llvm/test/Other/lint.ll index a156301c1c26b..45c8bd55fa014 100644 --- a/llvm/test/Other/lint.ll +++ b/llvm/test/Other/lint.ll @@ -1,5 +1,4 @@ ; RUN: opt -basic-aa -lint -disable-output < %s 2>&1 | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes=lint -disable-output < %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64" declare fastcc void @bar() diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 3ec2f15abd48b..1c6d22dd672af 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -67,6 +67,7 @@ static_library("Analysis") { "LazyCallGraph.cpp", "LazyValueInfo.cpp", "LegacyDivergenceAnalysis.cpp", + "Lint.cpp", "Loads.cpp", "LoopAccessAnalysis.cpp", "LoopAnalysisManager.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn index c934480512573..aa2631a9d229a 100644 --- a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn @@ -47,7 +47,6 @@ static_library("IR") { "LLVMContextImpl.cpp", "LLVMRemarkStreamer.cpp", "LegacyPassManager.cpp", - "Lint.cpp", "MDBuilder.cpp", "Mangler.cpp", "Metadata.cpp", From fa95e35593353810c1b26e3641451fa82089d792 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 2 Sep 2020 21:58:06 -0700 Subject: [PATCH 074/465] [lldb] Pass -fno-objc-exceptions for objcxx targets When compiling an Objective-C++ file, __has_feature(cxx_exceptions) will return true with -fno-exceptions but without -fno-objc-exceptions. This was causing LLVM_ENABLE_EXCEPTIONS to be defined for a subset of files. --- lldb/source/Host/macosx/objcxx/CMakeLists.txt | 2 ++ lldb/source/Plugins/Platform/MacOSX/objcxx/CMakeLists.txt | 3 +++ 2 files changed, 5 insertions(+) diff --git a/lldb/source/Host/macosx/objcxx/CMakeLists.txt b/lldb/source/Host/macosx/objcxx/CMakeLists.txt index e55b094c0c305..9db24f3064185 100644 --- a/lldb/source/Host/macosx/objcxx/CMakeLists.txt +++ b/lldb/source/Host/macosx/objcxx/CMakeLists.txt @@ -14,3 +14,5 @@ add_lldb_library(lldbHostMacOSXObjCXX LINK_COMPONENTS Support ) + +target_compile_options(lldbHostMacOSXObjCXX PRIVATE -fno-objc-exceptions) diff --git a/lldb/source/Plugins/Platform/MacOSX/objcxx/CMakeLists.txt b/lldb/source/Plugins/Platform/MacOSX/objcxx/CMakeLists.txt index 946ff0a64c26f..7d094a5865c43 100644 --- a/lldb/source/Plugins/Platform/MacOSX/objcxx/CMakeLists.txt +++ b/lldb/source/Plugins/Platform/MacOSX/objcxx/CMakeLists.txt @@ -15,3 +15,6 @@ add_lldb_library(lldbPluginPlatformMacOSXObjCXX Object Support ) + + +target_compile_options(lldbPluginPlatformMacOSXObjCXX PRIVATE -fno-objc-exceptions) From 3746906193c1be913fe60072de9d4feb80f9c461 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 31 Aug 2020 15:13:49 -0700 Subject: [PATCH 075/465] [lldb] Add reproducer verifier Add a reproducer verifier that catches: - Missing or invalid home directory - Missing or invalid working directory - Missing or invalid module/symbol paths - Missing files from the VFS The verifier is enabled by default during replay, but can be skipped by passing --reproducer-no-verify. Differential revision: https://reviews.llvm.org/D86497 --- lldb/include/lldb/API/SBReproducer.h | 25 +++ lldb/include/lldb/Utility/Reproducer.h | 16 ++ lldb/source/API/SBReproducer.cpp | 63 ++++++- .../Commands/CommandObjectReproducer.cpp | 164 +++++++++++++++--- lldb/source/Commands/Options.td | 6 + lldb/source/Utility/Reproducer.cpp | 91 ++++++++++ lldb/source/Utility/ReproducerProvider.cpp | 1 + .../Shell/Reproducer/TestDebugSymbols.test | 4 + lldb/test/Shell/Reproducer/TestVerify.test | 27 +++ lldb/tools/driver/Driver.cpp | 6 +- lldb/tools/driver/Options.td | 2 + llvm/include/llvm/Support/VirtualFileSystem.h | 4 + llvm/lib/Support/VirtualFileSystem.cpp | 11 ++ 13 files changed, 388 insertions(+), 32 deletions(-) create mode 100644 lldb/test/Shell/Reproducer/TestVerify.test diff --git a/lldb/include/lldb/API/SBReproducer.h b/lldb/include/lldb/API/SBReproducer.h index 78044e9acbc31..5578162412c8b 100644 --- a/lldb/include/lldb/API/SBReproducer.h +++ b/lldb/include/lldb/API/SBReproducer.h @@ -11,8 +11,32 @@ #include "lldb/API/SBDefines.h" +namespace lldb_private { +namespace repro { +struct ReplayOptions; +} +} // namespace lldb_private + namespace lldb { +class LLDB_API SBReplayOptions { +public: + SBReplayOptions(); + SBReplayOptions(const SBReplayOptions &rhs); + ~SBReplayOptions(); + + SBReplayOptions &operator=(const SBReplayOptions &rhs); + + void SetVerify(bool verify); + bool GetVerify() const; + + void SetCheckVersion(bool check); + bool GetCheckVersion() const; + +private: + std::unique_ptr m_opaque_up; +}; + /// The SBReproducer class is special because it bootstraps the capture and /// replay of SB API calls. As a result we cannot rely on any other SB objects /// in the interface or implementation of this class. @@ -22,6 +46,7 @@ class LLDB_API SBReproducer { static const char *Capture(const char *path); static const char *Replay(const char *path); static const char *Replay(const char *path, bool skip_version_check); + static const char *Replay(const char *path, const SBReplayOptions &options); static const char *PassiveReplay(const char *path); static const char *GetPath(); static bool SetAutoGenerate(bool b); diff --git a/lldb/include/lldb/Utility/Reproducer.h b/lldb/include/lldb/Utility/Reproducer.h index d6cde44850901..7e5591493d71e 100644 --- a/lldb/include/lldb/Utility/Reproducer.h +++ b/lldb/include/lldb/Utility/Reproducer.h @@ -227,6 +227,22 @@ class Reproducer { mutable std::mutex m_mutex; }; +class Verifier { +public: + Verifier(Loader *loader) : m_loader(loader) {} + void Verify(llvm::function_ref error_callback, + llvm::function_ref warning_callback, + llvm::function_ref note_callback) const; + +private: + Loader *m_loader; +}; + +struct ReplayOptions { + bool verify = true; + bool check_version = true; +}; + } // namespace repro } // namespace lldb_private diff --git a/lldb/source/API/SBReproducer.cpp b/lldb/source/API/SBReproducer.cpp index 7d08a88fe9e30..233e55550b5b7 100644 --- a/lldb/source/API/SBReproducer.cpp +++ b/lldb/source/API/SBReproducer.cpp @@ -30,6 +30,33 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::repro; +SBReplayOptions::SBReplayOptions() + : m_opaque_up(std::make_unique()){}; + +SBReplayOptions::SBReplayOptions(const SBReplayOptions &rhs) + : m_opaque_up(std::make_unique(*rhs.m_opaque_up)) {} + +SBReplayOptions::~SBReplayOptions() = default; + +SBReplayOptions &SBReplayOptions::operator=(const SBReplayOptions &rhs) { + if (this == &rhs) + return *this; + *m_opaque_up = *rhs.m_opaque_up; + return *this; +} + +void SBReplayOptions::SetVerify(bool verify) { m_opaque_up->verify = verify; } + +bool SBReplayOptions::GetVerify() const { return m_opaque_up->verify; } + +void SBReplayOptions::SetCheckVersion(bool check) { + m_opaque_up->check_version = check; +} + +bool SBReplayOptions::GetCheckVersion() const { + return m_opaque_up->check_version; +} + SBRegistry::SBRegistry() { Registry &R = *this; @@ -163,10 +190,18 @@ const char *SBReproducer::PassiveReplay(const char *path) { } const char *SBReproducer::Replay(const char *path) { - return SBReproducer::Replay(path, false); + SBReplayOptions options; + return SBReproducer::Replay(path, options); } const char *SBReproducer::Replay(const char *path, bool skip_version_check) { + SBReplayOptions options; + options.SetCheckVersion(!skip_version_check); + return SBReproducer::Replay(path, options); +} + +const char *SBReproducer::Replay(const char *path, + const SBReplayOptions &options) { static std::string error; if (auto e = Reproducer::Initialize(ReproducerMode::Replay, FileSpec(path))) { error = llvm::toString(std::move(e)); @@ -179,7 +214,7 @@ const char *SBReproducer::Replay(const char *path, bool skip_version_check) { return error.c_str(); } - if (!skip_version_check) { + if (options.GetCheckVersion()) { llvm::Expected version = loader->LoadBuffer(); if (!version) { error = llvm::toString(version.takeError()); @@ -195,6 +230,30 @@ const char *SBReproducer::Replay(const char *path, bool skip_version_check) { } } + if (options.GetVerify()) { + bool verification_failed = false; + llvm::raw_string_ostream os(error); + auto error_callback = [&](llvm::StringRef error) { + verification_failed = true; + os << "\nerror: " << error; + }; + + auto warning_callback = [&](llvm::StringRef warning) { + verification_failed = true; + os << "\nwarning: " << warning; + }; + + auto note_callback = [&](llvm::StringRef warning) {}; + + Verifier verifier(loader); + verifier.Verify(error_callback, warning_callback, note_callback); + + if (verification_failed) { + os.flush(); + return error.c_str(); + } + } + FileSpec file = loader->GetFile(); if (!file) { error = "unable to get replay data from reproducer."; diff --git a/lldb/source/Commands/CommandObjectReproducer.cpp b/lldb/source/Commands/CommandObjectReproducer.cpp index da2d9ca5a901a..ae4894009054b 100644 --- a/lldb/source/Commands/CommandObjectReproducer.cpp +++ b/lldb/source/Commands/CommandObjectReproducer.cpp @@ -116,6 +116,9 @@ static constexpr OptionEnumValues ReproducerSignalType() { #define LLDB_OPTIONS_reproducer_xcrash #include "CommandOptions.inc" +#define LLDB_OPTIONS_reproducer_verify +#include "CommandOptions.inc" + template llvm::Expected static ReadFromYAML(StringRef filename) { auto error_or_file = MemoryBuffer::getFile(filename); @@ -134,6 +137,38 @@ llvm::Expected static ReadFromYAML(StringRef filename) { return t; } +static void SetError(CommandReturnObject &result, Error err) { + result.GetErrorStream().Printf("error: %s\n", + toString(std::move(err)).c_str()); + result.SetStatus(eReturnStatusFailed); +} + +/// Create a loader from the given path if specified. Otherwise use the current +/// loader used for replay. +static Loader * +GetLoaderFromPathOrCurrent(llvm::Optional &loader_storage, + CommandReturnObject &result, + FileSpec reproducer_path) { + if (reproducer_path) { + loader_storage.emplace(reproducer_path); + Loader *loader = &(*loader_storage); + if (Error err = loader->LoadIndex()) { + // This is a hard error and will set the result to eReturnStatusFailed. + SetError(result, std::move(err)); + return nullptr; + } + return loader; + } + + if (Loader *loader = Reproducer::Instance().GetLoader()) + return loader; + + // This is a soft error because this is expected to fail during capture. + result.SetError("Not specifying a reproducer is only support during replay."); + result.SetStatus(eReturnStatusSuccessFinishNoResult); + return nullptr; +} + class CommandObjectReproducerGenerate : public CommandObjectParsed { public: CommandObjectReproducerGenerate(CommandInterpreter &interpreter) @@ -312,12 +347,6 @@ class CommandObjectReproducerStatus : public CommandObjectParsed { } }; -static void SetError(CommandReturnObject &result, Error err) { - result.GetErrorStream().Printf("error: %s\n", - toString(std::move(err)).c_str()); - result.SetStatus(eReturnStatusFailed); -} - class CommandObjectReproducerDump : public CommandObjectParsed { public: CommandObjectReproducerDump(CommandInterpreter &interpreter) @@ -382,29 +411,11 @@ class CommandObjectReproducerDump : public CommandObjectParsed { return false; } - // If no reproducer path is specified, use the loader currently used for - // replay. Otherwise create a new loader just for dumping. llvm::Optional loader_storage; - Loader *loader = nullptr; - if (!m_options.file) { - loader = Reproducer::Instance().GetLoader(); - if (loader == nullptr) { - result.SetError( - "Not specifying a reproducer is only support during replay."); - result.SetStatus(eReturnStatusSuccessFinishNoResult); - return false; - } - } else { - loader_storage.emplace(m_options.file); - loader = &(*loader_storage); - if (Error err = loader->LoadIndex()) { - SetError(result, std::move(err)); - return false; - } - } - - // If we get here we should have a valid loader. - assert(loader); + Loader *loader = + GetLoaderFromPathOrCurrent(loader_storage, result, m_options.file); + if (!loader) + return false; switch (m_options.provider) { case eReproducerProviderFiles: { @@ -583,6 +594,101 @@ class CommandObjectReproducerDump : public CommandObjectParsed { CommandOptions m_options; }; +class CommandObjectReproducerVerify : public CommandObjectParsed { +public: + CommandObjectReproducerVerify(CommandInterpreter &interpreter) + : CommandObjectParsed(interpreter, "reproducer verify", + "Verify the contents of a reproducer. " + "If no reproducer is specified during replay, it " + "verifies the content of the current reproducer.", + nullptr) {} + + ~CommandObjectReproducerVerify() override = default; + + Options *GetOptions() override { return &m_options; } + + class CommandOptions : public Options { + public: + CommandOptions() : Options(), file() {} + + ~CommandOptions() override = default; + + Status SetOptionValue(uint32_t option_idx, StringRef option_arg, + ExecutionContext *execution_context) override { + Status error; + const int short_option = m_getopt_table[option_idx].val; + + switch (short_option) { + case 'f': + file.SetFile(option_arg, FileSpec::Style::native); + FileSystem::Instance().Resolve(file); + break; + default: + llvm_unreachable("Unimplemented option"); + } + + return error; + } + + void OptionParsingStarting(ExecutionContext *execution_context) override { + file.Clear(); + } + + ArrayRef GetDefinitions() override { + return makeArrayRef(g_reproducer_verify_options); + } + + FileSpec file; + }; + +protected: + bool DoExecute(Args &command, CommandReturnObject &result) override { + if (!command.empty()) { + result.AppendErrorWithFormat("'%s' takes no arguments", + m_cmd_name.c_str()); + return false; + } + + llvm::Optional loader_storage; + Loader *loader = + GetLoaderFromPathOrCurrent(loader_storage, result, m_options.file); + if (!loader) + return false; + + bool errors = false; + auto error_callback = [&](llvm::StringRef error) { + errors = true; + result.AppendError(error); + }; + + bool warnings = false; + auto warning_callback = [&](llvm::StringRef warning) { + warnings = true; + result.AppendWarning(warning); + }; + + auto note_callback = [&](llvm::StringRef warning) { + result.AppendMessage(warning); + }; + + Verifier verifier(loader); + verifier.Verify(error_callback, warning_callback, note_callback); + + if (warnings || errors) { + result.AppendMessage("reproducer verification failed"); + result.SetStatus(eReturnStatusFailed); + } else { + result.AppendMessage("reproducer verification succeeded"); + result.SetStatus(eReturnStatusSuccessFinishResult); + } + + return result.Succeeded(); + } + +private: + CommandOptions m_options; +}; + CommandObjectReproducer::CommandObjectReproducer( CommandInterpreter &interpreter) : CommandObjectMultiword( @@ -605,6 +711,8 @@ CommandObjectReproducer::CommandObjectReproducer( new CommandObjectReproducerStatus(interpreter))); LoadSubCommand("dump", CommandObjectSP(new CommandObjectReproducerDump(interpreter))); + LoadSubCommand("verify", CommandObjectSP( + new CommandObjectReproducerVerify(interpreter))); LoadSubCommand("xcrash", CommandObjectSP( new CommandObjectReproducerXCrash(interpreter))); } diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 4bfaf18ec3025..eacd6de1910c1 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -451,6 +451,12 @@ let Command = "reproducer dump" in { "provided, that reproducer is dumped.">; } +let Command = "reproducer verify" in { + def reproducer_verify_file : Option<"file", "f">, Group<1>, Arg<"Filename">, + Desc<"The reproducer path. If a reproducer is replayed and no path is " + "provided, that reproducer is dumped.">; +} + let Command = "reproducer xcrash" in { def reproducer_signal : Option<"signal", "s">, Group<1>, EnumArg<"None", "ReproducerSignalType()">, diff --git a/lldb/source/Utility/Reproducer.cpp b/lldb/source/Utility/Reproducer.cpp index 68c64195f55ee..1f9ab8d37174e 100644 --- a/lldb/source/Utility/Reproducer.cpp +++ b/lldb/source/Utility/Reproducer.cpp @@ -268,3 +268,94 @@ bool Loader::HasFile(StringRef file) { auto it = std::lower_bound(m_files.begin(), m_files.end(), file.str()); return (it != m_files.end()) && (*it == file); } + +void Verifier::Verify( + llvm::function_ref error_callback, + llvm::function_ref warning_callback, + llvm::function_ref note_callack) const { + if (!m_loader) { + error_callback("invalid loader"); + return; + } + + FileSpec vfs_mapping = m_loader->GetFile(); + ErrorOr> buffer = + vfs::getRealFileSystem()->getBufferForFile(vfs_mapping.GetPath()); + if (!buffer) { + error_callback("unable to read files: " + buffer.getError().message()); + return; + } + + IntrusiveRefCntPtr vfs = vfs::getVFSFromYAML( + std::move(buffer.get()), nullptr, vfs_mapping.GetPath()); + if (!vfs) { + error_callback("unable to initialize the virtual file system"); + return; + } + + auto &redirecting_vfs = static_cast(*vfs); + redirecting_vfs.setFallthrough(false); + + { + llvm::Expected working_dir = + GetDirectoryFrom(m_loader); + if (working_dir) { + if (!vfs->exists(*working_dir)) + warning_callback("working directory '" + *working_dir + "' not in VFS"); + vfs->setCurrentWorkingDirectory(*working_dir); + } else { + warning_callback("no working directory in reproducer: " + + toString(working_dir.takeError())); + } + } + + { + llvm::Expected home_dir = + GetDirectoryFrom(m_loader); + if (home_dir) { + if (!vfs->exists(*home_dir)) + warning_callback("home directory '" + *home_dir + "' not in VFS"); + } else { + warning_callback("no home directory in reproducer: " + + toString(home_dir.takeError())); + } + } + + { + Expected symbol_files = + m_loader->LoadBuffer(); + if (symbol_files) { + std::vector entries; + llvm::yaml::Input yin(*symbol_files); + yin >> entries; + for (const auto &entry : entries) { + if (!entry.module_path.empty() && !vfs->exists(entry.module_path)) { + warning_callback("'" + entry.module_path + "': module path for " + + entry.uuid + " not in VFS"); + } + if (!entry.symbol_path.empty() && !vfs->exists(entry.symbol_path)) { + warning_callback("'" + entry.symbol_path + "': symbol path for " + + entry.uuid + " not in VFS"); + } + } + } else { + llvm::consumeError(symbol_files.takeError()); + } + } + + // Missing files in the VFS are notes rather than warnings. Because the VFS + // is a snapshot, temporary files could have been removed between when they + // were recorded and when the reproducer was generated. + std::vector roots = redirecting_vfs.getRoots(); + for (llvm::StringRef root : roots) { + std::error_code ec; + vfs::recursive_directory_iterator iter(*vfs, root, ec); + vfs::recursive_directory_iterator end; + for (; iter != end && !ec; iter.increment(ec)) { + ErrorOr status = vfs->status(iter->path()); + if (!status) + note_callack("'" + iter->path().str() + + "': " + status.getError().message()); + } + } +} diff --git a/lldb/source/Utility/ReproducerProvider.cpp b/lldb/source/Utility/ReproducerProvider.cpp index f5556659390bf..d67c886708a2f 100644 --- a/lldb/source/Utility/ReproducerProvider.cpp +++ b/lldb/source/Utility/ReproducerProvider.cpp @@ -9,6 +9,7 @@ #include "lldb/Utility/ReproducerProvider.h" #include "lldb/Utility/ProcessInfo.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" using namespace lldb_private; diff --git a/lldb/test/Shell/Reproducer/TestDebugSymbols.test b/lldb/test/Shell/Reproducer/TestDebugSymbols.test index 6a3cc1249cbd1..986452ec35e86 100644 --- a/lldb/test/Shell/Reproducer/TestDebugSymbols.test +++ b/lldb/test/Shell/Reproducer/TestDebugSymbols.test @@ -12,3 +12,7 @@ # DUMP: uuid: AD52358C-94F8-3796-ADD6-B20FFAC00E5C # DUMP-NEXT: module path: /path/to/unstripped/executable # DUMP-NEXT: symbol path: /path/to/foo.dSYM/Contents/Resources/DWARF/foo + +# RUN: not %lldb -b -o 'reproducer verify -f %t.repro' 2>&1 | FileCheck %s --check-prefix VERIFY +# VERIFY: warning: '/path/to/unstripped/executable': module path for AD52358C-94F8-3796-ADD6-B20FFAC00E5C not in VFS +# VERIFY: warning: '/path/to/foo.dSYM/Contents/Resources/DWARF/foo': symbol path for AD52358C-94F8-3796-ADD6-B20FFAC00E5C not in VFS diff --git a/lldb/test/Shell/Reproducer/TestVerify.test b/lldb/test/Shell/Reproducer/TestVerify.test new file mode 100644 index 0000000000000..0b34e62aab558 --- /dev/null +++ b/lldb/test/Shell/Reproducer/TestVerify.test @@ -0,0 +1,27 @@ +# RUN: rm -rf %t.repro +# RUN: rm -rf %t.repro2 +# RUN: %clang_host %S/Inputs/simple.c -g -o %t.out +# RUN: %lldb -x -b -s %S/Inputs/GDBRemoteCapture.in --capture --capture-path %t.repro %t.out +# RUN: %lldb --replay %t.repro + +# RUN: echo "/bogus/home/dir" > %t.repro/home.txt +# RUN: echo "/bogus/current/working/dir" > %t.repro/cwd.txt + +# RUN: not %lldb -b -o 'reproducer verify -f %t.repro' 2>&1 | FileCheck %s +# CHECK: working directory '/bogus/current/working/dir' not in VFS +# CHECK: home directory '/bogus/home/dir' not in VFS + +# RUN: rm %t.repro/root/%S/Inputs/GDBRemoteCapture.in +# RUN: echo "CHECK: '%S/Inputs/GDBRemoteCapture.in': No such file or directory" > %t.check +# RUN: not %lldb -b -o 'reproducer verify -f %t.repro' 2>&1 | FileCheck %t.check + +# RUN: not %lldb --replay %t.repro 2>&1 | FileCheck %s + +# At this point the reproducer is too broken to ignore the verification issues. +# Capture a new reproducer and only change the home directory, which is +# recoverable as far as this test goes. + +# RUN: %lldb -x -b -s %S/Inputs/GDBRemoteCapture.in --capture --capture-path %t.repro2 %t.out +# RUN: echo "/bogus/home/dir" > %t.repro2/home.txt +# RUN: %lldb --replay %t.repro2 --reproducer-no-verify 2>&1 | FileCheck %s --check-prefix NO-VERIFY +# NO-VERIFY-NOT: home directory '/bogus/home/dir' not in VFS diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index 3837d06ed8d81..79720ddd1bf60 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -800,9 +800,11 @@ static void printHelp(LLDBOptTable &table, llvm::StringRef tool_name) { llvm::Optional InitializeReproducer(llvm::StringRef argv0, opt::InputArgList &input_args) { if (auto *replay_path = input_args.getLastArg(OPT_replay)) { - const bool no_version_check = input_args.hasArg(OPT_no_version_check); + SBReplayOptions replay_options; + replay_options.SetCheckVersion(!input_args.hasArg(OPT_no_version_check)); + replay_options.SetVerify(!input_args.hasArg(OPT_no_verification)); if (const char *error = - SBReproducer::Replay(replay_path->getValue(), no_version_check)) { + SBReproducer::Replay(replay_path->getValue(), replay_options)) { WithColor::error() << "reproducer replay failed: " << error << '\n'; return 1; } diff --git a/lldb/tools/driver/Options.td b/lldb/tools/driver/Options.td index 96f696ec3ca6e..b3ffc2d694eff 100644 --- a/lldb/tools/driver/Options.td +++ b/lldb/tools/driver/Options.td @@ -234,6 +234,8 @@ def replay: Separate<["--", "-"], "replay">, HelpText<"Tells the debugger to replay a reproducer from .">; def no_version_check: F<"reproducer-no-version-check">, HelpText<"Disable the reproducer version check.">; +def no_verification: F<"reproducer-no-verify">, + HelpText<"Disable the reproducer verification.">; def no_generate_on_signal: F<"reproducer-no-generate-on-signal">, HelpText<"Don't generate reproducer when a signal is received.">; def generate_on_exit: F<"reproducer-generate-on-exit">, diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index af09c21085c5e..055c0e5dd86f3 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -749,6 +749,10 @@ class RedirectingFileSystem : public vfs::FileSystem { StringRef getExternalContentsPrefixDir() const; + void setFallthrough(bool Fallthrough); + + std::vector getRoots() const; + void dump(raw_ostream &OS) const; void dumpEntry(raw_ostream &OS, Entry *E, int NumSpaces = 0) const; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 5b757c9ea80db..bbde44c30caaa 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -1159,6 +1159,17 @@ StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const { return ExternalContentsPrefixDir; } +void RedirectingFileSystem::setFallthrough(bool Fallthrough) { + IsFallthrough = Fallthrough; +} + +std::vector RedirectingFileSystem::getRoots() const { + std::vector R; + for (const auto &Root : Roots) + R.push_back(Root->getName()); + return R; +} + void RedirectingFileSystem::dump(raw_ostream &OS) const { for (const auto &Root : Roots) dumpEntry(OS, Root.get()); From 8d35080ebbea477316159a5af7d925bb51d805d0 Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Wed, 2 Sep 2020 14:02:40 +0000 Subject: [PATCH 076/465] [mlir][Linalg] Wrong tile size for convolutions fixed Sizes of tiles (subviews) are bigger by 1 than they should. Let's consider 1D convolution without batches or channels. Furthermore let m iterate over the output and n over the kernel then input is accessed with m + n. In tiling subview sizes for convolutions are computed by applying requested tile size together with kernel size to the above mentioned expression thus let's say for tile size of 2 the subview size is 2 + size(n), which is bigger by one than it should since we move kernel only once. The problem behind it is that range is not turned into closed interval before the composition. This commit fixes the problem by turning ranges first into closed intervals by substracting 1 and after the composition back to half open by adding 1. Differential Revision: https://reviews.llvm.org/D86638 --- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 8 ++++++-- mlir/test/Dialect/Linalg/tile_conv.mlir | 2 +- mlir/test/Dialect/Linalg/tile_simple_conv.mlir | 6 +++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 6dc98628850f9..daaad2e6fa4be 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -243,7 +243,9 @@ static SmallVector makeTiledViews(OpBuilder &b, Location loc, for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) { bool isTiled = !isZero(tileSizes[idx]); lbs.push_back(isTiled ? ivs[idxIvs++] : (Value)std_constant_index(0)); - subViewSizes.push_back(isTiled ? tileSizes[idx] : viewSizes[idx]); + // Before composing, we need to make range a closed interval. + Value size = isTiled ? tileSizes[idx] : viewSizes[idx]; + subViewSizes.push_back(size - std_constant_index(1)); } auto *op = linalgOp.getOperation(); @@ -282,7 +284,9 @@ static SmallVector makeTiledViews(OpBuilder &b, Location loc, auto m = map.getSubMap({r}); auto offset = applyMapToValues(b, loc, m, lbs).front(); offsets.push_back(offset); - auto size = applyMapToValues(b, loc, m, subViewSizes).front(); + auto closedIntSize = applyMapToValues(b, loc, m, subViewSizes).front(); + // Resulting size needs to be made half open interval again. + auto size = closedIntSize + std_constant_index(1); // The size of the subview should be trimmed to avoid out-of-bounds // accesses, unless we statically know the subview size divides the view diff --git a/mlir/test/Dialect/Linalg/tile_conv.mlir b/mlir/test/Dialect/Linalg/tile_conv.mlir index a08a2f1e585c6..3b76f8a3139c1 100644 --- a/mlir/test/Dialect/Linalg/tile_conv.mlir +++ b/mlir/test/Dialect/Linalg/tile_conv.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004 // TILE-23004-DAG: #[[$D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)> -// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 90, d0 * -30 + s1)> +// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s1)> // TILE-23004-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> // TILE-23004-DAG: #[[$bound_map_4:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> diff --git a/mlir/test/Dialect/Linalg/tile_simple_conv.mlir b/mlir/test/Dialect/Linalg/tile_simple_conv.mlir index f854f7570fef3..b71f4bc0d3a8f 100644 --- a/mlir/test/Dialect/Linalg/tile_simple_conv.mlir +++ b/mlir/test/Dialect/Linalg/tile_simple_conv.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" | FileCheck %s // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s1)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 4, -d0 + s1)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s1)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s1)> // CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)> // CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> @@ -46,4 +46,4 @@ func @conv(%arg0 : memref, %arg1 : memref, %arg2 : mem // CHECK: %[[T19:.*]] = dim %[[ARG2]], %[[C3]] // CHECK: %[[SV2:.*]] = subview %[[ARG2]][%[[ARG3]], %[[ARG4]], %[[ARG5]], 0] // CHECK-SAME: [%[[T14]], %[[T16]], %[[T18]], %[[T19]]] -// CHECK: linalg.conv(%[[ARG0]], %[[SV1]], %[[SV2]]) \ No newline at end of file +// CHECK: linalg.conv(%[[ARG0]], %[[SV1]], %[[SV2]]) From c0b6bc070e78cbd20bc4351704f52d85192e8804 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 3 Sep 2020 05:57:57 +0000 Subject: [PATCH 077/465] Decouple OpPassManager from the the MLIRContext (NFC) This is allowing to build an OpPassManager from a StringRef instead of an Identifier, which enables building pipelines without an MLIRContext. An identifier is still cached on-demand on the OpPassManager for efficiency during the IR traversal. --- mlir/include/mlir/Pass/PassManager.h | 16 +++-- mlir/lib/Pass/Pass.cpp | 92 ++++++++++++++++++---------- mlir/lib/Pass/PassStatistics.cpp | 2 +- 3 files changed, 71 insertions(+), 39 deletions(-) diff --git a/mlir/include/mlir/Pass/PassManager.h b/mlir/include/mlir/Pass/PassManager.h index 8addd9809f90a..ec88485cd3efa 100644 --- a/mlir/include/mlir/Pass/PassManager.h +++ b/mlir/include/mlir/Pass/PassManager.h @@ -47,7 +47,8 @@ struct OpPassManagerImpl; /// other OpPassManagers or the top-level PassManager. class OpPassManager { public: - OpPassManager(Identifier name, MLIRContext *context, bool verifyPasses); + OpPassManager(Identifier name, bool verifyPasses); + OpPassManager(StringRef name, bool verifyPasses); OpPassManager(OpPassManager &&rhs); OpPassManager(const OpPassManager &rhs); ~OpPassManager(); @@ -73,7 +74,7 @@ class OpPassManager { OpPassManager &nest(Identifier nestedName); OpPassManager &nest(StringRef nestedName); template OpPassManager &nest() { - return nest(Identifier::get(OpT::getOperationName(), getContext())); + return nest(OpT::getOperationName()); } /// Add the given pass to this pass manager. If this pass has a concrete @@ -89,11 +90,11 @@ class OpPassManager { /// Returns the number of passes held by this manager. size_t size() const; - /// Return an instance of the context. - MLIRContext *getContext() const; + /// Return the operation name that this pass manager operates on. + Identifier getOpName(MLIRContext &context) const; /// Return the operation name that this pass manager operates on. - Identifier getOpName() const; + StringRef getOpName() const; /// Returns the internal implementation instance. detail::OpPassManagerImpl &getImpl(); @@ -151,6 +152,9 @@ class PassManager : public OpPassManager { LLVM_NODISCARD LogicalResult run(ModuleOp module); + /// Return an instance of the context. + MLIRContext *getContext() const { return context; } + /// Enable support for the pass manager to generate a reproducer on the event /// of a crash or a pass failure. `outputFile` is a .mlir filename used to /// write the generated reproducer. If `genLocalReproducer` is true, the pass @@ -304,6 +308,8 @@ class PassManager : public OpPassManager { runWithCrashRecovery(MutableArrayRef> passes, ModuleOp module, AnalysisManager am); + MLIRContext *context; + /// Flag that specifies if pass statistics should be dumped. Optional passStatisticsMode; diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index d3cf62574afda..3ac41cde7911b 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -92,8 +92,10 @@ void VerifierPass::runOnOperation() { namespace mlir { namespace detail { struct OpPassManagerImpl { - OpPassManagerImpl(Identifier name, MLIRContext *ctx, bool verifyPasses) - : name(name), context(ctx), verifyPasses(verifyPasses) {} + OpPassManagerImpl(Identifier identifier, bool verifyPasses) + : name(identifier), identifier(identifier), verifyPasses(verifyPasses) {} + OpPassManagerImpl(StringRef name, bool verifyPasses) + : name(name), verifyPasses(verifyPasses) {} /// Merge the passes of this pass manager into the one provided. void mergeInto(OpPassManagerImpl &rhs); @@ -101,9 +103,7 @@ struct OpPassManagerImpl { /// Nest a new operation pass manager for the given operation kind under this /// pass manager. OpPassManager &nest(Identifier nestedName); - OpPassManager &nest(StringRef nestedName) { - return nest(Identifier::get(nestedName, getContext())); - } + OpPassManager &nest(StringRef nestedName); /// Add the given pass to this pass manager. If this pass has a concrete /// operation type, it must be the same type as this pass manager. @@ -117,14 +117,18 @@ struct OpPassManagerImpl { /// pass. void splitAdaptorPasses(); - /// Return an instance of the context. - MLIRContext *getContext() const { return context; } + Identifier getOpName(MLIRContext &context) { + if (!identifier) + identifier = Identifier::get(name, &context); + return *identifier; + } /// The name of the operation that passes of this pass manager operate on. - Identifier name; + StringRef name; - /// The current context for this pass manager - MLIRContext *context; + /// The cached identifier (internalized in the context) for the name of the + /// operation that passes of this pass manager operate on. + Optional identifier; /// Flag that specifies if the IR should be verified after each pass has run. bool verifyPasses : 1; @@ -143,7 +147,14 @@ void OpPassManagerImpl::mergeInto(OpPassManagerImpl &rhs) { } OpPassManager &OpPassManagerImpl::nest(Identifier nestedName) { - OpPassManager nested(nestedName, getContext(), verifyPasses); + OpPassManager nested(nestedName, verifyPasses); + auto *adaptor = new OpToOpPassAdaptor(std::move(nested)); + addPass(std::unique_ptr(adaptor)); + return adaptor->getPassManagers().front(); +} + +OpPassManager &OpPassManagerImpl::nest(StringRef nestedName) { + OpPassManager nested(nestedName, verifyPasses); auto *adaptor = new OpToOpPassAdaptor(std::move(nested)); addPass(std::unique_ptr(adaptor)); return adaptor->getPassManagers().front(); @@ -153,7 +164,7 @@ void OpPassManagerImpl::addPass(std::unique_ptr pass) { // If this pass runs on a different operation than this pass manager, then // implicitly nest a pass manager for this operation. auto passOpName = pass->getOpName(); - if (passOpName && passOpName != name.strref()) + if (passOpName && passOpName != name) return nest(*passOpName).addPass(std::move(pass)); passes.emplace_back(std::move(pass)); @@ -240,14 +251,14 @@ void OpPassManagerImpl::splitAdaptorPasses() { // OpPassManager //===----------------------------------------------------------------------===// -OpPassManager::OpPassManager(Identifier name, MLIRContext *context, - bool verifyPasses) - : impl(new OpPassManagerImpl(name, context, verifyPasses)) {} +OpPassManager::OpPassManager(Identifier name, bool verifyPasses) + : impl(new OpPassManagerImpl(name, verifyPasses)) {} +OpPassManager::OpPassManager(StringRef name, bool verifyPasses) + : impl(new OpPassManagerImpl(name, verifyPasses)) {} OpPassManager::OpPassManager(OpPassManager &&rhs) : impl(std::move(rhs.impl)) {} OpPassManager::OpPassManager(const OpPassManager &rhs) { *this = rhs; } OpPassManager &OpPassManager::operator=(const OpPassManager &rhs) { - impl.reset(new OpPassManagerImpl(rhs.impl->name, rhs.impl->getContext(), - rhs.impl->verifyPasses)); + impl.reset(new OpPassManagerImpl(rhs.impl->name, rhs.impl->verifyPasses)); for (auto &pass : rhs.impl->passes) impl->passes.emplace_back(pass->clone()); return *this; @@ -290,11 +301,13 @@ size_t OpPassManager::size() const { return impl->passes.size(); } /// Returns the internal implementation instance. OpPassManagerImpl &OpPassManager::getImpl() { return *impl; } -/// Return an instance of the context. -MLIRContext *OpPassManager::getContext() const { return impl->getContext(); } +/// Return the operation name that this pass manager operates on. +StringRef OpPassManager::getOpName() const { return impl->name; } /// Return the operation name that this pass manager operates on. -Identifier OpPassManager::getOpName() const { return impl->name; } +Identifier OpPassManager::getOpName(MLIRContext &context) const { + return impl->getOpName(context); +} /// Prints out the given passes as the textual representation of a pipeline. static void printAsTextualPipeline(ArrayRef> passes, @@ -389,12 +402,22 @@ LogicalResult OpToOpPassAdaptor::runPipeline( /// Find an operation pass manager that can operate on an operation of the given /// type, or nullptr if one does not exist. static OpPassManager *findPassManagerFor(MutableArrayRef mgrs, - Identifier name) { + StringRef name) { auto it = llvm::find_if( mgrs, [&](OpPassManager &mgr) { return mgr.getOpName() == name; }); return it == mgrs.end() ? nullptr : &*it; } +/// Find an operation pass manager that can operate on an operation of the given +/// type, or nullptr if one does not exist. +static OpPassManager *findPassManagerFor(MutableArrayRef mgrs, + Identifier name, + MLIRContext &context) { + auto it = llvm::find_if( + mgrs, [&](OpPassManager &mgr) { return mgr.getOpName(context) == name; }); + return it == mgrs.end() ? nullptr : &*it; +} + OpToOpPassAdaptor::OpToOpPassAdaptor(OpPassManager &&mgr) { mgrs.emplace_back(std::move(mgr)); } @@ -421,8 +444,7 @@ void OpToOpPassAdaptor::mergeInto(OpToOpPassAdaptor &rhs) { // After coalescing, sort the pass managers within rhs by name. llvm::array_pod_sort(rhs.mgrs.begin(), rhs.mgrs.end(), [](const OpPassManager *lhs, const OpPassManager *rhs) { - return lhs->getOpName().strref().compare( - rhs->getOpName().strref()); + return lhs->getOpName().compare(rhs->getOpName()); }); } @@ -454,16 +476,18 @@ void OpToOpPassAdaptor::runOnOperationImpl() { for (auto ®ion : getOperation()->getRegions()) { for (auto &block : region) { for (auto &op : block) { - auto *mgr = findPassManagerFor(mgrs, op.getName().getIdentifier()); + auto *mgr = findPassManagerFor(mgrs, op.getName().getIdentifier(), + *op.getContext()); if (!mgr) continue; + Identifier opName = mgr->getOpName(*getOperation()->getContext()); // Run the held pipeline over the current operation. if (instrumentor) - instrumentor->runBeforePipeline(mgr->getOpName(), parentInfo); + instrumentor->runBeforePipeline(opName, parentInfo); auto result = runPipeline(mgr->getPasses(), &op, am.nest(&op)); if (instrumentor) - instrumentor->runAfterPipeline(mgr->getOpName(), parentInfo); + instrumentor->runAfterPipeline(opName, parentInfo); if (failed(result)) return signalPassFailure(); @@ -499,7 +523,8 @@ void OpToOpPassAdaptor::runOnOperationAsyncImpl() { for (auto &block : region) { for (auto &op : block) { // Add this operation iff the name matches any of the pass managers. - if (findPassManagerFor(mgrs, op.getName().getIdentifier())) + if (findPassManagerFor(mgrs, op.getName().getIdentifier(), + getContext())) opAMPairs.emplace_back(&op, am.nest(&op)); } } @@ -535,16 +560,17 @@ void OpToOpPassAdaptor::runOnOperationAsyncImpl() { // Get the pass manager for this operation and execute it. auto &it = opAMPairs[nextID]; - auto *pm = - findPassManagerFor(pms, it.first->getName().getIdentifier()); + auto *pm = findPassManagerFor( + pms, it.first->getName().getIdentifier(), getContext()); assert(pm && "expected valid pass manager for operation"); + Identifier opName = pm->getOpName(*getOperation()->getContext()); if (instrumentor) - instrumentor->runBeforePipeline(pm->getOpName(), parentInfo); + instrumentor->runBeforePipeline(opName, parentInfo); auto pipelineResult = runPipeline(pm->getPasses(), it.first, it.second); if (instrumentor) - instrumentor->runAfterPipeline(pm->getOpName(), parentInfo); + instrumentor->runAfterPipeline(opName, parentInfo); // Drop this thread from being tracked by the diagnostic handler. // After this task has finished, the thread may be used outside of @@ -737,9 +763,9 @@ PassManager::runWithCrashRecovery(MutableArrayRef> passes, //===----------------------------------------------------------------------===// PassManager::PassManager(MLIRContext *ctx, bool verifyPasses) - : OpPassManager(Identifier::get(ModuleOp::getOperationName(), ctx), ctx, + : OpPassManager(Identifier::get(ModuleOp::getOperationName(), ctx), verifyPasses), - passTiming(false), localReproducer(false) {} + context(ctx), passTiming(false), localReproducer(false) {} PassManager::~PassManager() {} diff --git a/mlir/lib/Pass/PassStatistics.cpp b/mlir/lib/Pass/PassStatistics.cpp index 3721230b6913d..d909c98abf563 100644 --- a/mlir/lib/Pass/PassStatistics.cpp +++ b/mlir/lib/Pass/PassStatistics.cpp @@ -116,7 +116,7 @@ static void printResultsAsPipeline(raw_ostream &os, OpPassManager &pm) { // Print each of the children passes. for (OpPassManager &mgr : mgrs) { - auto name = ("'" + mgr.getOpName().strref() + "' Pipeline").str(); + auto name = ("'" + mgr.getOpName() + "' Pipeline").str(); printPassEntry(os, indent, name); for (Pass &pass : mgr.getPasses()) printPass(indent + 2, &pass); From 9e9e6e698d8ef5dc5b7576058f2022aab2534a52 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Tue, 1 Sep 2020 15:43:38 +0200 Subject: [PATCH 078/465] Adding GDB PrettyPrinter for mlir::Identifier. This is the first bit from D73546. Primarily setting up the corresponding test. Will add more pretty printers in a separate revision. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D86937 --- debuginfo-tests/CMakeLists.txt | 7 ++++++ debuginfo-tests/lit.cfg.py | 1 + debuginfo-tests/lit.site.cfg.py.in | 2 ++ .../llvm-prettyprinters/gdb/mlir-support.cpp | 8 +++++++ .../llvm-prettyprinters/gdb/mlir-support.gdb | 8 +++++++ mlir/utils/gdb-scripts/prettyprinters.py | 22 +++++++++++++++++++ 6 files changed, 48 insertions(+) create mode 100644 debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp create mode 100644 debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb create mode 100644 mlir/utils/gdb-scripts/prettyprinters.py diff --git a/debuginfo-tests/CMakeLists.txt b/debuginfo-tests/CMakeLists.txt index d3ac0a4aad90a..3750f2e0b026b 100644 --- a/debuginfo-tests/CMakeLists.txt +++ b/debuginfo-tests/CMakeLists.txt @@ -9,6 +9,12 @@ add_llvm_executable(check-gdb-llvm-support ) target_link_libraries(check-gdb-llvm-support PRIVATE LLVMSupport) +add_llvm_executable(check-gdb-mlir-support + llvm-prettyprinters/gdb/mlir-support.cpp +) +target_include_directories(check-gdb-mlir-support PRIVATE ${LLVM_EXTERNAL_MLIR_SOURCE_DIR}/include) +target_link_libraries(check-gdb-mlir-support PRIVATE MLIRIR) + set(DEBUGINFO_TESTS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(DEBUGINFO_TESTS_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) @@ -19,6 +25,7 @@ set(DEBUGINFO_TEST_DEPS llvm-config llvm-objdump check-gdb-llvm-support + check-gdb-mlir-support not ) diff --git a/debuginfo-tests/lit.cfg.py b/debuginfo-tests/lit.cfg.py index 4c45b723d2e9a..ba09b1f4cfae4 100644 --- a/debuginfo-tests/lit.cfg.py +++ b/debuginfo-tests/lit.cfg.py @@ -46,6 +46,7 @@ config.debuginfo_tests_src_root, 'llgdb-tests', 'test_debuginfo.pl')), ToolSubst("%llvm_src_root", config.llvm_src_root), ToolSubst("%llvm_tools_dir", config.llvm_tools_dir), + ToolSubst("%mlir_src_root", config.mlir_src_root), ] def get_required_attr(config, attr_name): diff --git a/debuginfo-tests/lit.site.cfg.py.in b/debuginfo-tests/lit.site.cfg.py.in index d5893f577aed0..4ed49b83bc35f 100644 --- a/debuginfo-tests/lit.site.cfg.py.in +++ b/debuginfo-tests/lit.site.cfg.py.in @@ -20,6 +20,8 @@ config.target_triple = "@TARGET_TRIPLE@" config.host_arch = "@HOST_ARCH@" config.is_msvc = lit.util.pythonize_bool("@MSVC@") +config.mlir_src_root = "@MLIR_SOURCE_DIR@" + config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.python3_executable = "@Python3_EXECUTABLE@" diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp b/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp new file mode 100644 index 0000000000000..d38fc3d03ad30 --- /dev/null +++ b/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp @@ -0,0 +1,8 @@ +#include "mlir/IR/Identifier.h" +#include "mlir/IR/MLIRContext.h" + +mlir::MLIRContext Context; + +auto Identifier = mlir::Identifier::get("foo", &Context); + +int main() { return 0; } diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb b/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb new file mode 100644 index 0000000000000..cca435e640e09 --- /dev/null +++ b/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb @@ -0,0 +1,8 @@ +# RUN: gdb -q -batch -n -iex 'source %mlir_src_root/utils/gdb-scripts/prettyprinters.py' -x %s %llvm_tools_dir/check-gdb-mlir-support | FileCheck %s --dump-input=fail +# REQUIRES: debug-info + +break main +run + +# CHECK: "foo" +p Identifier diff --git a/mlir/utils/gdb-scripts/prettyprinters.py b/mlir/utils/gdb-scripts/prettyprinters.py new file mode 100644 index 0000000000000..5ea20aca83d88 --- /dev/null +++ b/mlir/utils/gdb-scripts/prettyprinters.py @@ -0,0 +1,22 @@ +"""GDB pretty printers for MLIR types.""" + +import gdb.printing + +class IdentifierPrinter: + """Prints an mlir::Identifier instance.""" + + def __init__(self, val): + self.entry = val['entry'] + + def to_string(self): + ptr = (self.entry + 1).cast(gdb.lookup_type('char').pointer()); + return ptr.string(length=self.entry['keyLength']) + + def display_hint(self): + return 'string' + +pp = gdb.printing.RegexpCollectionPrettyPrinter('MLIRSupport') + +pp.add_printer('mlir::Identifier', '^mlir::Identifier$', IdentifierPrinter) + +gdb.printing.register_pretty_printer(gdb.current_objfile(), pp) From 2878ecc90f1f22cf0b96c04a4124122db008a2a9 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 2 Sep 2020 23:17:48 -0700 Subject: [PATCH 079/465] [StackProtector] Fix crash with vararg due to not checking LocationSize validity. Differential Revision: https://reviews.llvm.org/D87074 --- llvm/lib/CodeGen/StackProtector.cpp | 3 ++- .../CodeGen/X86/stack-guard-memloc-vararg.ll | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/stack-guard-memloc-vararg.ll diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index e246c2e5f55cb..3d961af8ec3e3 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -170,7 +170,8 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, // If this instruction accesses memory make sure it doesn't access beyond // the bounds of the allocated object. Optional MemLoc = MemoryLocation::getOrNone(I); - if (MemLoc.hasValue() && MemLoc->Size.getValue() > AllocSize) + if (MemLoc.hasValue() && MemLoc->Size.hasValue() && + MemLoc->Size.getValue() > AllocSize) return true; switch (I->getOpcode()) { case Instruction::Store: diff --git a/llvm/test/CodeGen/X86/stack-guard-memloc-vararg.ll b/llvm/test/CodeGen/X86/stack-guard-memloc-vararg.ll new file mode 100644 index 0000000000000..d881b6cfae3b1 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-guard-memloc-vararg.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=x86_64 -O0 < %s | FileCheck %s + +; Check that we don't crash on this input. +; CHECK-LABEL: @foo +; CHECK: __stack_chk_guard +; CHECK: retq +define hidden void @foo(i8** %ptr) #0 { +entry: + %args.addr = alloca i8*, align 8 + %0 = va_arg i8** %args.addr, i8* + store i8* %0, i8** %ptr + ret void +} + +attributes #0 = { sspstrong } +attributes #1 = { optsize } + From f0699d9109143754088c26604c58f5ab3e9d4678 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 3 Sep 2020 09:17:03 +0200 Subject: [PATCH 080/465] [debugserver] Fix that debugserver's stop reply packets always return signal code 0 If our process terminates due to an unhandled signal, we are supposed to get the signal code via WTERMSIG. However, we instead try to get the exit status via WEXITSTATUS which just ends up always calculating signal code 0 (at least on the macOS implementation where it just shifts the signal code bits away and we're left with only 0 bits). The exit status calculation on the LLDB side also seems a bit off as it claims an exit status that is just the signal code (instead of for example 128 + signal code), but that will be another patch. Reviewed By: jasonmolenda Differential Revision: https://reviews.llvm.org/D86336 --- lldb/test/Shell/Process/Inputs/abort.c | 3 +++ lldb/test/Shell/Process/TestAbortExitCode.test | 6 ++++++ lldb/tools/debugserver/source/RNBRemote.cpp | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 lldb/test/Shell/Process/Inputs/abort.c create mode 100644 lldb/test/Shell/Process/TestAbortExitCode.test diff --git a/lldb/test/Shell/Process/Inputs/abort.c b/lldb/test/Shell/Process/Inputs/abort.c new file mode 100644 index 0000000000000..9edc9336dc3e0 --- /dev/null +++ b/lldb/test/Shell/Process/Inputs/abort.c @@ -0,0 +1,3 @@ +#include + +int main(int argc, char **argv) { abort(); } diff --git a/lldb/test/Shell/Process/TestAbortExitCode.test b/lldb/test/Shell/Process/TestAbortExitCode.test new file mode 100644 index 0000000000000..a61c095051124 --- /dev/null +++ b/lldb/test/Shell/Process/TestAbortExitCode.test @@ -0,0 +1,6 @@ +UNSUPPORTED: system-windows + +RUN: %clang_host %p/Inputs/abort.c -o %t +RUN: %lldb %t -o run -o continue | FileCheck %s + +CHECK: status = 6 (0x00000006) Terminated due to signal 6 diff --git a/lldb/tools/debugserver/source/RNBRemote.cpp b/lldb/tools/debugserver/source/RNBRemote.cpp index 5e2512731f39c..b66cc8f583e8e 100644 --- a/lldb/tools/debugserver/source/RNBRemote.cpp +++ b/lldb/tools/debugserver/source/RNBRemote.cpp @@ -3066,7 +3066,7 @@ rnb_err_t RNBRemote::HandlePacket_last_signal(const char *unused) { WEXITSTATUS(pid_status)); else if (WIFSIGNALED(pid_status)) snprintf(pid_exited_packet, sizeof(pid_exited_packet), "X%02x", - WEXITSTATUS(pid_status)); + WTERMSIG(pid_status)); else if (WIFSTOPPED(pid_status)) snprintf(pid_exited_packet, sizeof(pid_exited_packet), "S%02x", WSTOPSIG(pid_status)); From 9124fa592098d3794d7b31f83a58e40cc469ff0c Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 3 Sep 2020 09:36:12 +0200 Subject: [PATCH 081/465] Fix broken HUGE_VALF macro in llvm-c/DataTypes.h Commit 3a29393b4709d15069130119cf1d136af4a92d77 removes the cmath/math.h includes from the DataTypes.h header to speed up parsing. However the DataTypes.h header was using this header to get the macro `HUGE_VAL` for its own `HUGE_VALF` macro definition. Now the macro instead just expands into a plain `HUGE_VAL` token which leads to compiler errors unless `math.h` was previously included by the including source file. It also leads to compiler warnings with enabled module builds which point out this inconsistency. The correct way to fix this seems to be to just remove HUGE_VALF from the header. llvm-c is not referencing that macro from what I can see and users probably should just include the math headers if they need it (or define it on their own for really old C versions). Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D83761 --- llvm/include/llvm-c/DataTypes.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/include/llvm-c/DataTypes.h b/llvm/include/llvm-c/DataTypes.h index 0f27ba81865e0..4eb0ac97d97e5 100644 --- a/llvm/include/llvm-c/DataTypes.h +++ b/llvm/include/llvm-c/DataTypes.h @@ -77,8 +77,4 @@ typedef signed int ssize_t; # define UINT64_MAX 0xffffffffffffffffULL #endif -#ifndef HUGE_VALF -#define HUGE_VALF (float)HUGE_VAL -#endif - #endif /* LLVM_C_DATATYPES_H */ From e123959e94716ef6b5942060ac5934f696eaa3d3 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 3 Sep 2020 09:54:37 +0200 Subject: [PATCH 082/465] [lldb] Remove debugserver specific string from TestAbortExitCode check The test only checks the exit code that the debug server sends back, but not the following explanation which is different for debugserver and lldb-server. --- lldb/test/Shell/Process/TestAbortExitCode.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/Shell/Process/TestAbortExitCode.test b/lldb/test/Shell/Process/TestAbortExitCode.test index a61c095051124..5be0a15ab1728 100644 --- a/lldb/test/Shell/Process/TestAbortExitCode.test +++ b/lldb/test/Shell/Process/TestAbortExitCode.test @@ -3,4 +3,4 @@ UNSUPPORTED: system-windows RUN: %clang_host %p/Inputs/abort.c -o %t RUN: %lldb %t -o run -o continue | FileCheck %s -CHECK: status = 6 (0x00000006) Terminated due to signal 6 +CHECK: status = 6 (0x00000006) From f5e2ea9a43221be9576056c4912796cf37331cd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sun, 16 Aug 2020 00:26:24 +0300 Subject: [PATCH 083/465] [AArch64] Add asm directives for the remaining SEH unwind codes Add support in llvm-readobj for displaying them and support in the asm parsser, AArch64TargetStreamer and MCWin64EH for emitting them. The directives for the remaining basic opcodes have names that match the opcode in the documentation. The directives for custom stack cases, that are named MSFT_OP_TRAP_FRAME, MSFT_OP_MACHINE_FRAME, MSFT_OP_CONTEXT and MSFT_OP_CLEAR_UNWOUND_TO_CALL, are given matching assembler directive names that fit into the rest of the opcode naming; .seh_trap_frame, .seh_context, .seh_clear_unwound_to_call The opcode MSFT_OP_MACHINE_FRAME is mapped to the existing opecode enum UOP_PushMachFrame that is used on x86_64, and also uses the corresponding existing x86_64 directive name .seh_pushframe. Differential Revision: https://reviews.llvm.org/D86889 --- llvm/include/llvm/Support/Win64EH.h | 8 +- llvm/lib/MC/MCWin64EH.cpp | 56 +++++++++++++ .../AArch64/AsmParser/AArch64AsmParser.cpp | 82 +++++++++++++++++++ .../MCTargetDesc/AArch64ELFStreamer.cpp | 13 +++ .../MCTargetDesc/AArch64TargetStreamer.h | 15 ++++ .../MCTargetDesc/AArch64WinCOFFStreamer.cpp | 29 +++++++ llvm/test/MC/AArch64/seh.s | 31 +++++-- llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp | 46 +++++++++++ llvm/tools/llvm-readobj/ARMWinEHPrinter.h | 8 ++ 9 files changed, 282 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Support/Win64EH.h b/llvm/include/llvm/Support/Win64EH.h index 8220131e5be92..9359fcb4286a9 100644 --- a/llvm/include/llvm/Support/Win64EH.h +++ b/llvm/include/llvm/Support/Win64EH.h @@ -38,12 +38,14 @@ enum UnwindOpcodes { // The following set of unwind opcodes is for ARM64. They are documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling UOP_AllocMedium, + UOP_SaveR19R20X, UOP_SaveFPLRX, UOP_SaveFPLR, UOP_SaveReg, UOP_SaveRegX, UOP_SaveRegP, UOP_SaveRegPX, + UOP_SaveLRPair, UOP_SaveFReg, UOP_SaveFRegX, UOP_SaveFRegP, @@ -51,7 +53,11 @@ enum UnwindOpcodes { UOP_SetFP, UOP_AddFP, UOP_Nop, - UOP_End + UOP_End, + UOP_SaveNext, + UOP_TrapFrame, + UOP_Context, + UOP_ClearUnwoundToCall }; /// UnwindCode - This union describes a single operation in a function prolog, diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp index d81687295bd04..fb0de40fc6d5f 100644 --- a/llvm/lib/MC/MCWin64EH.cpp +++ b/llvm/lib/MC/MCWin64EH.cpp @@ -280,6 +280,9 @@ ARM64CountOfUnwindCodes(const std::vector &Insns) { case Win64EH::UOP_AllocLarge: Count += 4; break; + case Win64EH::UOP_SaveR19R20X: + Count += 1; + break; case Win64EH::UOP_SaveFPLRX: Count += 1; break; @@ -298,6 +301,9 @@ ARM64CountOfUnwindCodes(const std::vector &Insns) { case Win64EH::UOP_SaveRegX: Count += 2; break; + case Win64EH::UOP_SaveLRPair: + Count += 2; + break; case Win64EH::UOP_SaveFReg: Count += 2; break; @@ -322,6 +328,21 @@ ARM64CountOfUnwindCodes(const std::vector &Insns) { case Win64EH::UOP_End: Count += 1; break; + case Win64EH::UOP_SaveNext: + Count += 1; + break; + case Win64EH::UOP_TrapFrame: + Count += 1; + break; + case Win64EH::UOP_PushMachFrame: + Count += 1; + break; + case Win64EH::UOP_Context: + Count += 1; + break; + case Win64EH::UOP_ClearUnwoundToCall: + Count += 1; + break; } } return Count; @@ -375,6 +396,11 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin, b = 0xE3; streamer.emitInt8(b); break; + case Win64EH::UOP_SaveR19R20X: + b = 0x20; + b |= (inst.Offset >> 3) & 0x1F; + streamer.emitInt8(b); + break; case Win64EH::UOP_SaveFPLRX: b = 0x80; b |= ((inst.Offset - 1) >> 3) & 0x3F; @@ -417,6 +443,16 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin, b = ((reg & 0x3) << 6) | ((inst.Offset >> 3) - 1); streamer.emitInt8(b); break; + case Win64EH::UOP_SaveLRPair: + assert(inst.Register >= 19 && "Saved reg must be >= 19"); + reg = inst.Register - 19; + assert((reg % 2) == 0 && "Saved reg must be 19+2*X"); + reg /= 2; + b = 0xD6 | ((reg & 0x7) >> 2); + streamer.emitInt8(b); + b = ((reg & 0x3) << 6) | (inst.Offset >> 3); + streamer.emitInt8(b); + break; case Win64EH::UOP_SaveFReg: assert(inst.Register >= 8 && "Saved dreg must be >= 8"); reg = inst.Register - 8; @@ -453,6 +489,26 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin, b = 0xE4; streamer.emitInt8(b); break; + case Win64EH::UOP_SaveNext: + b = 0xE6; + streamer.emitInt8(b); + break; + case Win64EH::UOP_TrapFrame: + b = 0xE8; + streamer.emitInt8(b); + break; + case Win64EH::UOP_PushMachFrame: + b = 0xE9; + streamer.emitInt8(b); + break; + case Win64EH::UOP_Context: + b = 0xEA; + streamer.emitInt8(b); + break; + case Win64EH::UOP_ClearUnwoundToCall: + b = 0xEC; + streamer.emitInt8(b); + break; } } diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index a74d15de25566..08a29bbb3e87a 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -186,12 +186,14 @@ class AArch64AsmParser : public MCTargetAsmParser { bool parseDirectiveSEHAllocStack(SMLoc L); bool parseDirectiveSEHPrologEnd(SMLoc L); + bool parseDirectiveSEHSaveR19R20X(SMLoc L); bool parseDirectiveSEHSaveFPLR(SMLoc L); bool parseDirectiveSEHSaveFPLRX(SMLoc L); bool parseDirectiveSEHSaveReg(SMLoc L); bool parseDirectiveSEHSaveRegX(SMLoc L); bool parseDirectiveSEHSaveRegP(SMLoc L); bool parseDirectiveSEHSaveRegPX(SMLoc L); + bool parseDirectiveSEHSaveLRPair(SMLoc L); bool parseDirectiveSEHSaveFReg(SMLoc L); bool parseDirectiveSEHSaveFRegX(SMLoc L); bool parseDirectiveSEHSaveFRegP(SMLoc L); @@ -199,8 +201,13 @@ class AArch64AsmParser : public MCTargetAsmParser { bool parseDirectiveSEHSetFP(SMLoc L); bool parseDirectiveSEHAddFP(SMLoc L); bool parseDirectiveSEHNop(SMLoc L); + bool parseDirectiveSEHSaveNext(SMLoc L); bool parseDirectiveSEHEpilogStart(SMLoc L); bool parseDirectiveSEHEpilogEnd(SMLoc L); + bool parseDirectiveSEHTrapFrame(SMLoc L); + bool parseDirectiveSEHMachineFrame(SMLoc L); + bool parseDirectiveSEHContext(SMLoc L); + bool parseDirectiveSEHClearUnwoundToCall(SMLoc L); bool validateInstruction(MCInst &Inst, SMLoc &IDLoc, SmallVectorImpl &Loc); @@ -5174,6 +5181,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveSEHAllocStack(Loc); else if (IDVal == ".seh_endprologue") parseDirectiveSEHPrologEnd(Loc); + else if (IDVal == ".seh_save_r19r20_x") + parseDirectiveSEHSaveR19R20X(Loc); else if (IDVal == ".seh_save_fplr") parseDirectiveSEHSaveFPLR(Loc); else if (IDVal == ".seh_save_fplr_x") @@ -5186,6 +5195,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveSEHSaveRegP(Loc); else if (IDVal == ".seh_save_regp_x") parseDirectiveSEHSaveRegPX(Loc); + else if (IDVal == ".seh_save_lrpair") + parseDirectiveSEHSaveLRPair(Loc); else if (IDVal == ".seh_save_freg") parseDirectiveSEHSaveFReg(Loc); else if (IDVal == ".seh_save_freg_x") @@ -5200,10 +5211,20 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveSEHAddFP(Loc); else if (IDVal == ".seh_nop") parseDirectiveSEHNop(Loc); + else if (IDVal == ".seh_save_next") + parseDirectiveSEHSaveNext(Loc); else if (IDVal == ".seh_startepilogue") parseDirectiveSEHEpilogStart(Loc); else if (IDVal == ".seh_endepilogue") parseDirectiveSEHEpilogEnd(Loc); + else if (IDVal == ".seh_trap_frame") + parseDirectiveSEHTrapFrame(Loc); + else if (IDVal == ".seh_pushframe") + parseDirectiveSEHMachineFrame(Loc); + else if (IDVal == ".seh_context") + parseDirectiveSEHContext(Loc); + else if (IDVal == ".seh_clear_unwound_to_call") + parseDirectiveSEHClearUnwoundToCall(Loc); else return true; } else @@ -5645,6 +5666,16 @@ bool AArch64AsmParser::parseDirectiveSEHPrologEnd(SMLoc L) { return false; } +/// parseDirectiveSEHSaveR19R20X +/// ::= .seh_save_r19r20_x +bool AArch64AsmParser::parseDirectiveSEHSaveR19R20X(SMLoc L) { + int64_t Offset; + if (parseImmExpr(Offset)) + return true; + getTargetStreamer().EmitARM64WinCFISaveR19R20X(Offset); + return false; +} + /// parseDirectiveSEHSaveFPLR /// ::= .seh_save_fplr bool AArch64AsmParser::parseDirectiveSEHSaveFPLR(SMLoc L) { @@ -5713,6 +5744,22 @@ bool AArch64AsmParser::parseDirectiveSEHSaveRegPX(SMLoc L) { return false; } +/// parseDirectiveSEHSaveLRPair +/// ::= .seh_save_lrpair +bool AArch64AsmParser::parseDirectiveSEHSaveLRPair(SMLoc L) { + unsigned Reg; + int64_t Offset; + L = getLoc(); + if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) || + parseComma() || parseImmExpr(Offset)) + return true; + if (check(((Reg - 19) % 2 != 0), L, + "expected register with even offset from x19")) + return true; + getTargetStreamer().EmitARM64WinCFISaveLRPair(Reg, Offset); + return false; +} + /// parseDirectiveSEHSaveFReg /// ::= .seh_save_freg bool AArch64AsmParser::parseDirectiveSEHSaveFReg(SMLoc L) { @@ -5785,6 +5832,13 @@ bool AArch64AsmParser::parseDirectiveSEHNop(SMLoc L) { return false; } +/// parseDirectiveSEHSaveNext +/// ::= .seh_save_next +bool AArch64AsmParser::parseDirectiveSEHSaveNext(SMLoc L) { + getTargetStreamer().EmitARM64WinCFISaveNext(); + return false; +} + /// parseDirectiveSEHEpilogStart /// ::= .seh_startepilogue bool AArch64AsmParser::parseDirectiveSEHEpilogStart(SMLoc L) { @@ -5799,6 +5853,34 @@ bool AArch64AsmParser::parseDirectiveSEHEpilogEnd(SMLoc L) { return false; } +/// parseDirectiveSEHTrapFrame +/// ::= .seh_trap_frame +bool AArch64AsmParser::parseDirectiveSEHTrapFrame(SMLoc L) { + getTargetStreamer().EmitARM64WinCFITrapFrame(); + return false; +} + +/// parseDirectiveSEHMachineFrame +/// ::= .seh_pushframe +bool AArch64AsmParser::parseDirectiveSEHMachineFrame(SMLoc L) { + getTargetStreamer().EmitARM64WinCFIMachineFrame(); + return false; +} + +/// parseDirectiveSEHContext +/// ::= .seh_context +bool AArch64AsmParser::parseDirectiveSEHContext(SMLoc L) { + getTargetStreamer().EmitARM64WinCFIContext(); + return false; +} + +/// parseDirectiveSEHClearUnwoundToCall +/// ::= .seh_clear_unwound_to_call +bool AArch64AsmParser::parseDirectiveSEHClearUnwoundToCall(SMLoc L) { + getTargetStreamer().EmitARM64WinCFIClearUnwoundToCall(); + return false; +} + bool AArch64AsmParser::classifySymbolRef(const MCExpr *Expr, AArch64MCExpr::VariantKind &ELFRefKind, diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index ce40e96814670..3e0e9ba9f5f7f 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -50,6 +50,9 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer { void EmitARM64WinCFIAllocStack(unsigned Size) override { OS << "\t.seh_stackalloc " << Size << "\n"; } + void EmitARM64WinCFISaveR19R20X(int Offset) override { + OS << "\t.seh_save_r19r20_x " << Offset << "\n"; + } void EmitARM64WinCFISaveFPLR(int Offset) override { OS << "\t.seh_save_fplr " << Offset << "\n"; } @@ -68,6 +71,9 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer { void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override { OS << "\t.seh_save_regp_x x" << Reg << ", " << Offset << "\n"; } + void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override { + OS << "\t.seh_save_lrpair x" << Reg << ", " << Offset << "\n"; + } void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override { OS << "\t.seh_save_freg d" << Reg << ", " << Offset << "\n"; } @@ -85,9 +91,16 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer { OS << "\t.seh_add_fp " << Size << "\n"; } void EmitARM64WinCFINop() override { OS << "\t.seh_nop\n"; } + void EmitARM64WinCFISaveNext() override { OS << "\t.seh_save_next\n"; } void EmitARM64WinCFIPrologEnd() override { OS << "\t.seh_endprologue\n"; } void EmitARM64WinCFIEpilogStart() override { OS << "\t.seh_startepilogue\n"; } void EmitARM64WinCFIEpilogEnd() override { OS << "\t.seh_endepilogue\n"; } + void EmitARM64WinCFITrapFrame() override { OS << "\t.seh_trap_frame\n"; } + void EmitARM64WinCFIMachineFrame() override { OS << "\t.seh_pushframe\n"; } + void EmitARM64WinCFIContext() override { OS << "\t.seh_context\n"; } + void EmitARM64WinCFIClearUnwoundToCall() override { + OS << "\t.seh_clear_unwound_to_call\n"; + } public: AArch64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h index 3a0c5d8318dd5..c0dee085caced 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h @@ -37,12 +37,14 @@ class AArch64TargetStreamer : public MCTargetStreamer { virtual void emitInst(uint32_t Inst); virtual void EmitARM64WinCFIAllocStack(unsigned Size) {} + virtual void EmitARM64WinCFISaveR19R20X(int Offset) {} virtual void EmitARM64WinCFISaveFPLR(int Offset) {} virtual void EmitARM64WinCFISaveFPLRX(int Offset) {} virtual void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) {} + virtual void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) {} virtual void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) {} @@ -50,9 +52,14 @@ class AArch64TargetStreamer : public MCTargetStreamer { virtual void EmitARM64WinCFISetFP() {} virtual void EmitARM64WinCFIAddFP(unsigned Size) {} virtual void EmitARM64WinCFINop() {} + virtual void EmitARM64WinCFISaveNext() {} virtual void EmitARM64WinCFIPrologEnd() {} virtual void EmitARM64WinCFIEpilogStart() {} virtual void EmitARM64WinCFIEpilogEnd() {} + virtual void EmitARM64WinCFITrapFrame() {} + virtual void EmitARM64WinCFIMachineFrame() {} + virtual void EmitARM64WinCFIContext() {} + virtual void EmitARM64WinCFIClearUnwoundToCall() {} private: std::unique_ptr ConstantPools; @@ -82,12 +89,14 @@ class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer { // The unwind codes on ARM64 Windows are documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling void EmitARM64WinCFIAllocStack(unsigned Size) override; + void EmitARM64WinCFISaveR19R20X(int Offset) override; void EmitARM64WinCFISaveFPLR(int Offset) override; void EmitARM64WinCFISaveFPLRX(int Offset) override; void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override; + void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override; void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override; @@ -95,9 +104,15 @@ class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer { void EmitARM64WinCFISetFP() override; void EmitARM64WinCFIAddFP(unsigned Size) override; void EmitARM64WinCFINop() override; + void EmitARM64WinCFISaveNext() override; void EmitARM64WinCFIPrologEnd() override; void EmitARM64WinCFIEpilogStart() override; void EmitARM64WinCFIEpilogEnd() override; + void EmitARM64WinCFITrapFrame() override; + void EmitARM64WinCFIMachineFrame() override; + void EmitARM64WinCFIContext() override; + void EmitARM64WinCFIClearUnwoundToCall() override; + private: void EmitARM64WinUnwindCode(unsigned UnwindCode, int Reg, int Offset); }; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp index 03fbab5142a2e..a07416420fe9e 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp @@ -85,6 +85,10 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIAllocStack(unsigned Size) { EmitARM64WinUnwindCode(Op, -1, Size); } +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveR19R20X(int Offset) { + EmitARM64WinUnwindCode(Win64EH::UOP_SaveR19R20X, -1, Offset); +} + void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFPLR(int Offset) { EmitARM64WinUnwindCode(Win64EH::UOP_SaveFPLR, -1, Offset); } @@ -115,6 +119,11 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegPX(unsigned Reg, EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegPX, Reg, Offset); } +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveLRPair(unsigned Reg, + int Offset) { + EmitARM64WinUnwindCode(Win64EH::UOP_SaveLRPair, Reg, Offset); +} + void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) { assert(Offset >= 0 && Offset <= 504 && @@ -150,6 +159,10 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFINop() { EmitARM64WinUnwindCode(Win64EH::UOP_Nop, -1, 0); } +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveNext() { + EmitARM64WinUnwindCode(Win64EH::UOP_SaveNext, -1, 0); +} + // The functions below handle opcodes that can end up in either a prolog or // an epilog, but not both. void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIPrologEnd() { @@ -188,6 +201,22 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogEnd() { CurrentEpilog = nullptr; } +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFITrapFrame() { + EmitARM64WinUnwindCode(Win64EH::UOP_TrapFrame, -1, 0); +} + +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIMachineFrame() { + EmitARM64WinUnwindCode(Win64EH::UOP_PushMachFrame, -1, 0); +} + +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIContext() { + EmitARM64WinUnwindCode(Win64EH::UOP_Context, -1, 0); +} + +void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIClearUnwoundToCall() { + EmitARM64WinUnwindCode(Win64EH::UOP_ClearUnwoundToCall, -1, 0); +} + MCWinCOFFStreamer *createAArch64WinCOFFStreamer( MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, std::unique_ptr Emitter, diff --git a/llvm/test/MC/AArch64/seh.s b/llvm/test/MC/AArch64/seh.s index 633eeb50d8dd7..f7faa64b9309a 100644 --- a/llvm/test/MC/AArch64/seh.s +++ b/llvm/test/MC/AArch64/seh.s @@ -20,7 +20,7 @@ // CHECK-NEXT: } // CHECK: Section { // CHECK: Name: .xdata -// CHECK: RawDataSize: 48 +// CHECK: RawDataSize: 56 // CHECK: RelocationCount: 1 // CHECK: Characteristics [ // CHECK-NEXT: ALIGN_4BYTES @@ -41,7 +41,7 @@ // CHECK-NEXT: Relocations [ // CHECK-NEXT: Section (4) .xdata { -// CHECK-NEXT: 0x24 IMAGE_REL_ARM64_ADDR32NB __C_specific_handler +// CHECK-NEXT: 0x2C IMAGE_REL_ARM64_ADDR32NB __C_specific_handler // CHECK-NEXT: } // CHECK-NEXT: Section (5) .pdata { // CHECK-NEXT: 0x0 IMAGE_REL_ARM64_ADDR32NB func @@ -54,8 +54,12 @@ // CHECK-NEXT: Function: func // CHECK-NEXT: ExceptionRecord: .xdata // CHECK-NEXT: ExceptionData { -// CHECK-NEXT: FunctionLength: 72 +// CHECK-NEXT: FunctionLength: 100 // CHECK: Prologue [ +// CHECK-NEXT: 0xec ; clear unwound to call +// CHECK-NEXT: 0xea ; context +// CHECK-NEXT: 0xe9 ; machine frame +// CHECK-NEXT: 0xe8 ; trap frame // CHECK-NEXT: 0xe3 ; nop // CHECK-NEXT: 0xe202 ; add fp, sp, #16 // CHECK-NEXT: 0xdd41 ; str d13, [sp, #8] @@ -66,7 +70,10 @@ // CHECK-NEXT: 0x46 ; stp x29, x30, [sp, #48] // CHECK-NEXT: 0xd141 ; str x24, [sp, #8] // CHECK-NEXT: 0xd483 ; str x23, [sp, #-32]! +// CHECK-NEXT: 0xe6 ; save next // CHECK-NEXT: 0xc882 ; stp x21, x22, [sp, #16] +// CHECK-NEXT: 0xd6c2 ; stp x25, lr, [sp, #16] +// CHECK-NEXT: 0x24 ; stp x19, x20, [sp, #-32]! // CHECK-NEXT: 0xcc03 ; stp x19, x20, [sp, #-32]! // CHECK-NEXT: 0x83 ; stp x29, x30, [sp, #-32]! // CHECK-NEXT: 0xe1 ; mov fp, sp @@ -75,8 +82,8 @@ // CHECK-NEXT: ] // CHECK-NEXT: EpilogueScopes [ // CHECK-NEXT: EpilogueScope { -// CHECK-NEXT: StartOffset: 16 -// CHECK-NEXT: EpilogueStartIndex: 25 +// CHECK-NEXT: StartOffset: 23 +// CHECK-NEXT: EpilogueStartIndex: 33 // CHECK-NEXT: Opcodes [ // CHECK-NEXT: 0x01 ; add sp, #16 // CHECK-NEXT: 0xe4 ; end @@ -108,8 +115,14 @@ func: .seh_save_fplr_x 32 stp x19, x20, [sp, #-32]! .seh_save_regp_x x19, 32 + stp x19, x20, [sp, #-32]! + .seh_save_r19r20_x 32 + stp x25, x30, [sp, #16] + .seh_save_lrpair x25, 16 stp x21, x22, [sp, #16] .seh_save_regp x21, 16 + stp x23, x24, [sp, #32] + .seh_save_next str x23, [sp, #-32]! .seh_save_reg_x x23, 32 str x24, [sp, #8] @@ -130,6 +143,14 @@ func: .seh_add_fp 16 nop .seh_nop + nop + .seh_trap_frame + nop + .seh_pushframe + nop + .seh_context + nop + .seh_clear_unwound_to_call .seh_endprologue nop .seh_startepilogue diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp index c1db03a61c9fa..d753185177050 100644 --- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp +++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp @@ -167,6 +167,11 @@ const Decoder::RingEntry Decoder::Ring64[] = { { 0xff, 0xe3, 1, &Decoder::opcode_nop }, { 0xff, 0xe4, 1, &Decoder::opcode_end }, { 0xff, 0xe5, 1, &Decoder::opcode_end_c }, + { 0xff, 0xe6, 1, &Decoder::opcode_save_next }, + { 0xff, 0xe8, 1, &Decoder::opcode_trap_frame }, + { 0xff, 0xe9, 1, &Decoder::opcode_machine_frame }, + { 0xff, 0xea, 1, &Decoder::opcode_context }, + { 0xff, 0xec, 1, &Decoder::opcode_clear_unwound_to_call }, }; void Decoder::printRegisters(const std::pair &RegisterMask) { @@ -776,6 +781,47 @@ bool Decoder::opcode_end_c(const uint8_t *OC, unsigned &Offset, unsigned Length, return true; } +bool Decoder::opcode_save_next(const uint8_t *OC, unsigned &Offset, + unsigned Length, bool Prologue) { + if (Prologue) + SW.startLine() << format("0x%02x ; save next\n", OC[Offset]); + else + SW.startLine() << format("0x%02x ; restore next\n", + OC[Offset]); + ++Offset; + return false; +} + +bool Decoder::opcode_trap_frame(const uint8_t *OC, unsigned &Offset, + unsigned Length, bool Prologue) { + SW.startLine() << format("0x%02x ; trap frame\n", OC[Offset]); + ++Offset; + return false; +} + +bool Decoder::opcode_machine_frame(const uint8_t *OC, unsigned &Offset, + unsigned Length, bool Prologue) { + SW.startLine() << format("0x%02x ; machine frame\n", + OC[Offset]); + ++Offset; + return false; +} + +bool Decoder::opcode_context(const uint8_t *OC, unsigned &Offset, + unsigned Length, bool Prologue) { + SW.startLine() << format("0x%02x ; context\n", OC[Offset]); + ++Offset; + return false; +} + +bool Decoder::opcode_clear_unwound_to_call(const uint8_t *OC, unsigned &Offset, + unsigned Length, bool Prologue) { + SW.startLine() << format("0x%02x ; clear unwound to call\n", + OC[Offset]); + ++Offset; + return false; +} + void Decoder::decodeOpcodes(ArrayRef Opcodes, unsigned Offset, bool Prologue) { assert((!Prologue || Offset == 0) && "prologue should always use offset 0"); diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h index 5de7062cb1d7b..36fe5d6f4b2b4 100644 --- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h +++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h @@ -120,6 +120,14 @@ class Decoder { bool Prologue); bool opcode_save_next(const uint8_t *Opcodes, unsigned &Offset, unsigned Length, bool Prologue); + bool opcode_trap_frame(const uint8_t *Opcodes, unsigned &Offset, + unsigned Length, bool Prologue); + bool opcode_machine_frame(const uint8_t *Opcodes, unsigned &Offset, + unsigned Length, bool Prologue); + bool opcode_context(const uint8_t *Opcodes, unsigned &Offset, unsigned Length, + bool Prologue); + bool opcode_clear_unwound_to_call(const uint8_t *Opcodes, unsigned &Offset, + unsigned Length, bool Prologue); void decodeOpcodes(ArrayRef Opcodes, unsigned Offset, bool Prologue); From 13cde6733b5fec9eff5e9652686c3b02055e6169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 3 Sep 2020 11:19:40 +0300 Subject: [PATCH 084/465] [lldb] Remove a stray semicolon, fixing pedantic GCC warnings. NFC. --- lldb/source/API/SBReproducer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/API/SBReproducer.cpp b/lldb/source/API/SBReproducer.cpp index 233e55550b5b7..ec1c85d243294 100644 --- a/lldb/source/API/SBReproducer.cpp +++ b/lldb/source/API/SBReproducer.cpp @@ -31,7 +31,7 @@ using namespace lldb_private; using namespace lldb_private::repro; SBReplayOptions::SBReplayOptions() - : m_opaque_up(std::make_unique()){}; + : m_opaque_up(std::make_unique()){} SBReplayOptions::SBReplayOptions(const SBReplayOptions &rhs) : m_opaque_up(std::make_unique(*rhs.m_opaque_up)) {} From 5b354d204d0952a6dd39e41fb41b51414bff5f0b Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 3 Sep 2020 10:26:33 +0200 Subject: [PATCH 085/465] [lldb] Make symbol list output from `image dump symtab` not depend on internal ordering of DenseMap `image dump symtab` seems to output the symbols in whatever order they appear in the DenseMap that is used to filter out symbols with non-unique addresses. As DenseMap is a hash map this order can change at any time so the output of this command is pretty unstable. This also causes the `Breakpad/symtab.test` to fail with enabled reverse iteration (which reverses the DenseMap order to find issues like this). This patch makes the DenseMap a std::vector and uses a separate DenseSet to do the address filtering. The output order is now dependent on the order in which the symbols are read (which should be deterministic). It might also avoid a bit of work as all the work for creating the Symbol constructor parameters is only done when we can actually emplace a new Symbol. Reviewed By: labath Differential Revision: https://reviews.llvm.org/D87036 --- .../SymbolFile/Breakpad/SymbolFileBreakpad.cpp | 15 ++++++++++----- lldb/test/Shell/SymbolFile/Breakpad/symtab.test | 8 ++++---- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index eeec7296747e2..07e5b284eab81 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -326,7 +326,8 @@ void SymbolFileBreakpad::AddSymbols(Symtab &symtab) { } const SectionList &list = *module.GetSectionList(); - llvm::DenseMap symbols; + llvm::DenseSet found_symbol_addresses; + std::vector symbols; auto add_symbol = [&](addr_t address, llvm::Optional size, llvm::StringRef name) { address += base; @@ -338,8 +339,12 @@ void SymbolFileBreakpad::AddSymbols(Symtab &symtab) { name, address); return; } - symbols.try_emplace( - address, /*symID*/ 0, Mangled(name), eSymbolTypeCode, + // Keep track of what addresses were already added so far and only add + // the symbol with the first address. + if (!found_symbol_addresses.insert(address).second) + return; + symbols.emplace_back( + /*symID*/ 0, Mangled(name), eSymbolTypeCode, /*is_global*/ true, /*is_debug*/ false, /*is_trampoline*/ false, /*is_artificial*/ false, AddressRange(section_sp, address - section_sp->GetFileAddress(), @@ -359,8 +364,8 @@ void SymbolFileBreakpad::AddSymbols(Symtab &symtab) { LLDB_LOG(log, "Failed to parse: {0}. Skipping record.", line); } - for (auto &KV : symbols) - symtab.AddSymbol(std::move(KV.second)); + for (Symbol &symbol : symbols) + symtab.AddSymbol(std::move(symbol)); symtab.CalculateSymbolSizes(); } diff --git a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test index a02d94c30aa3a..1eb03fa43deb0 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test +++ b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test @@ -6,10 +6,10 @@ # CHECK: Symtab, file = {{.*}}symtab.out, num_symbols = 5: # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name # CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}}$$symtab.out -# CHECK: [ 1] 0 X Code 0x00000000004000c0 0x0000000000000010 0x00000000 f2 -# CHECK: [ 2] 0 X Code 0x00000000004000d0 0x0000000000000022 0x00000000 _start -# CHECK: [ 3] 0 X Code 0x00000000004000a0 0x000000000000000d 0x00000000 func_only -# CHECK: [ 4] 0 X Code 0x00000000004000b0 0x000000000000000c 0x00000000 f1_func +# CHECK: [ 1] 0 X Code 0x00000000004000b0 0x000000000000000c 0x00000000 f1_func +# CHECK: [ 2] 0 X Code 0x00000000004000a0 0x000000000000000d 0x00000000 func_only +# CHECK: [ 3] 0 X Code 0x00000000004000c0 0x0000000000000010 0x00000000 f2 +# CHECK: [ 4] 0 X Code 0x00000000004000d0 0x0000000000000022 0x00000000 _start # CHECK-LABEL: (lldb) image lookup -a 0x4000b0 -v # CHECK: Address: symtab.out[0x00000000004000b0] (symtab.out.PT_LOAD[0]..text2 + 0) From bf8f6e89c8d6fbac3e91ab37de7431d8e7c8aab4 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Thu, 3 Sep 2020 10:05:51 +0200 Subject: [PATCH 086/465] [lldb/Interpreter] Fix language detection for the REPL InitFile Previously, before loading the REPL language-specific init file, lldb checked the selected target language in which case it returned an unknown language type with the REPL target. Instead, the patch calls `Language::GetLanguagesSupportingREPLs` and look for the first element of that set. In case lldb was not configured with a REPL language, then, it will just stop sourcing the REPL init file and fallback to the original logic (continuing with the default init file). rdar://65836048 Differential Revision: https://reviews.llvm.org/D87076 Signed-off-by: Med Ismail Bennani --- lldb/source/Interpreter/CommandInterpreter.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 8c77227d01f2a..1f67468000976 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2091,9 +2091,12 @@ static void GetHomeInitFile(llvm::SmallVectorImpl &init_file, FileSystem::Instance().Resolve(init_file); } -static void GetHomeREPLInitFile(llvm::SmallVectorImpl &init_file, - LanguageType language) { - if (language == LanguageType::eLanguageTypeUnknown) +static void GetHomeREPLInitFile(llvm::SmallVectorImpl &init_file) { + LanguageSet repl_languages = Language::GetLanguagesSupportingREPLs(); + LanguageType language = eLanguageTypeUnknown; + if (auto main_repl_language = repl_languages.GetSingularLanguage()) + language = *main_repl_language; + else return; std::string init_file_name = @@ -2191,13 +2194,8 @@ void CommandInterpreter::SourceInitFileHome(CommandReturnObject &result, llvm::SmallString<128> init_file; - if (is_repl) { - LanguageType language = {}; - TargetSP target_sp = GetDebugger().GetSelectedTarget(); - if (target_sp) - language = target_sp->GetLanguage(); - GetHomeREPLInitFile(init_file, language); - } + if (is_repl) + GetHomeREPLInitFile(init_file); if (init_file.empty()) GetHomeInitFile(init_file); From ce654f4be085f0039e7c0d2f1e37db25931e56e2 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 7 Aug 2020 13:21:53 +0100 Subject: [PATCH 087/465] [SCCP] Add test where dereferenceable ptr is replaced with un-dereferenceable one --- ...eferenceable-ptr-with-undereferenceable.ll | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll diff --git a/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll b/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll new file mode 100644 index 0000000000000..5857ce2d30b72 --- /dev/null +++ b/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -ipsccp -S %s | FileCheck %s + +@y = common global [1 x i32] zeroinitializer, align 4 +@x = common global [1 x i32] zeroinitializer, align 4 + +define i32 @eq_undereferenceable(i32* %p) { +; CHECK-LABEL: @eq_undereferenceable( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 2, i32* getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1), align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + store i32 1, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 + %cmp = icmp eq i32* %p, getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 2, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 + ret i32 %0 +} + + +define i32 @eq_dereferenceable(i32* %p) { +; CHECK-LABEL: @eq_dereferenceable( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0) +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 2, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + store i32 1, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 + %cmp = icmp eq i32* %p, getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 0) + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 2, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 + ret i32 %0 +} + +define i1 @eq_undereferenceable_cmp_simp(i32* %p) { +; CHECK-LABEL: @eq_undereferenceable_cmp_simp( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_0:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) +; CHECK-NEXT: br i1 [[CMP_0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 2, i32* getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1), align 4 +; CHECK-NEXT: ret i1 true +; CHECK: if.end: +; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i32* [[P]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) +; CHECK-NEXT: ret i1 [[CMP_2]] +; +entry: + %cmp.0 = icmp eq i32* %p, getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) + br i1 %cmp.0, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 2, i32* %p, align 4 + %cmp.1 = icmp eq i32* %p, getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) + ret i1 %cmp.1 + +if.end: ; preds = %if.then, %entry + %cmp.2 = icmp eq i32* %p, getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) + ret i1 %cmp.2 +} From 3a204318c6cd6a6b54081e58ced012e2ec120cc9 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 2 Sep 2020 18:06:45 +0300 Subject: [PATCH 088/465] [llvm-readelf/obj] - Cleanup the interface of `DumpStyle`. NFCI. We have 2 DumpStyles currently: `class GNUStyle : public DumpStyle` and `class LLVMStyle : public DumpStyle`. The problem of `DumpStyle` interface is that almost for each method we provide `const ELFFile *` as argument. But in fact each of dump styles keeps `ELFDumper *Dumper` which can be used to get an object from. But since we use the `Obj` too often, I've decided to introduce a one more reference member instead of reading it from the `Dumper` each time: `const ELFFile &Obj;` This is kind of similar to `FileName` member which we have already: it is also used to store a the file name which can be read from `Dumper->getElfObject()->getFileName()`. I had to adjust the code which previously worked with a pointer to an object and now works with a reference. In a follow-up I am going to try to get rid of `const ELFObjectFile` arguments which are still passed to a set of functions. Differential revision: https://reviews.llvm.org/D87040 --- llvm/tools/llvm-readobj/ELFDumper.cpp | 894 ++++++++++++-------------- 1 file changed, 413 insertions(+), 481 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index fa821ff6a619b..dfcbb00bf2340 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -372,10 +372,10 @@ template class ELFDumper : public ObjDumper { }; template -static std::string describe(const ELFFile *Obj, +static std::string describe(const ELFFile &Obj, const typename ELFT::Shdr &Sec) { - unsigned SecNdx = &Sec - &cantFail(Obj->sections()).front(); - return (object::getELFSectionTypeName(Obj->getHeader()->e_machine, + unsigned SecNdx = &Sec - &cantFail(Obj.sections()).front(); + return (object::getELFSectionTypeName(Obj.getHeader()->e_machine, Sec.sh_type) + " section with index " + Twine(SecNdx)) .str(); @@ -383,19 +383,19 @@ static std::string describe(const ELFFile *Obj, template std::string ELFDumper::describe(const Elf_Shdr &Sec) const { - return ::describe(ObjF->getELFFile(), Sec); + return ::describe(*ObjF->getELFFile(), Sec); } template -static Expected getLinkAsStrtab(const ELFFile *Obj, +static Expected getLinkAsStrtab(const ELFFile &Obj, const typename ELFT::Shdr *Sec) { Expected StrTabSecOrErr = - Obj->getSection(Sec->sh_link); + Obj.getSection(Sec->sh_link); if (!StrTabSecOrErr) return createError("invalid section linked to " + describe(Obj, *Sec) + ": " + toString(StrTabSecOrErr.takeError())); - Expected StrTabOrErr = Obj->getStringTable(*StrTabSecOrErr); + Expected StrTabOrErr = Obj.getStringTable(*StrTabSecOrErr); if (!StrTabOrErr) return createError("invalid string table linked to " + describe(Obj, *Sec) + ": " + toString(StrTabOrErr.takeError())); @@ -405,22 +405,22 @@ static Expected getLinkAsStrtab(const ELFFile *Obj, // Returns the linked symbol table and associated string table for a given section. template static Expected> -getLinkAsSymtab(const ELFFile *Obj, const typename ELFT::Shdr *Sec, +getLinkAsSymtab(const ELFFile &Obj, const typename ELFT::Shdr *Sec, unsigned ExpectedType) { Expected SymtabOrErr = - Obj->getSection(Sec->sh_link); + Obj.getSection(Sec->sh_link); if (!SymtabOrErr) return createError("invalid section linked to " + describe(Obj, *Sec) + ": " + toString(SymtabOrErr.takeError())); if ((*SymtabOrErr)->sh_type != ExpectedType) - return createError( - "invalid section linked to " + describe(Obj, *Sec) + ": expected " + - object::getELFSectionTypeName(Obj->getHeader()->e_machine, - ExpectedType) + - ", but got " + - object::getELFSectionTypeName(Obj->getHeader()->e_machine, - (*SymtabOrErr)->sh_type)); + return createError("invalid section linked to " + describe(Obj, *Sec) + + ": expected " + + object::getELFSectionTypeName(Obj.getHeader()->e_machine, + ExpectedType) + + ", but got " + + object::getELFSectionTypeName(Obj.getHeader()->e_machine, + (*SymtabOrErr)->sh_type)); Expected StrTabOrErr = getLinkAsStrtab(Obj, *SymtabOrErr); if (!StrTabOrErr) @@ -428,7 +428,7 @@ getLinkAsSymtab(const ELFFile *Obj, const typename ELFT::Shdr *Sec, "can't get a string table for the symbol table linked to " + describe(Obj, *Sec) + ": " + toString(StrTabOrErr.takeError())); - Expected SymsOrErr = Obj->symbols(*SymtabOrErr); + Expected SymsOrErr = Obj.symbols(*SymtabOrErr); if (!SymsOrErr) return createError("unable to read symbols from the " + describe(Obj, *Sec) + ": " + @@ -454,7 +454,7 @@ ELFDumper::getVersionTable(const Elf_Shdr *Sec, ArrayRef *SymTab, toString(VersionsOrErr.takeError())); Expected, StringRef>> SymTabOrErr = - getLinkAsSymtab(Obj, Sec, SHT_DYNSYM); + getLinkAsSymtab(*Obj, Sec, SHT_DYNSYM); if (!SymTabOrErr) { reportUniqueWarning(SymTabOrErr.takeError()); return *VersionsOrErr; @@ -478,7 +478,7 @@ Expected> ELFDumper::getVersionDefinitions(const Elf_Shdr *Sec) const { const ELFFile *Obj = ObjF->getELFFile(); - Expected StrTabOrErr = getLinkAsStrtab(Obj, Sec); + Expected StrTabOrErr = getLinkAsStrtab(*Obj, Sec); if (!StrTabOrErr) return StrTabOrErr.takeError(); @@ -565,7 +565,7 @@ Expected> ELFDumper::getVersionDependencies(const Elf_Shdr *Sec) const { const ELFFile *Obj = ObjF->getELFFile(); StringRef StrTab; - Expected StrTabOrErr = getLinkAsStrtab(Obj, Sec); + Expected StrTabOrErr = getLinkAsStrtab(*Obj, Sec); if (!StrTabOrErr) reportUniqueWarning(StrTabOrErr.takeError()); else @@ -681,10 +681,9 @@ void ELFDumper::printSymbolsHelper(bool IsDynamic) const { return S.st_other & ~0x3; }) != Syms.end(); - ELFDumperStyle->printSymtabMessage(Obj, SymtabSec, Entries, - NonVisibilityBitsUsed); + ELFDumperStyle->printSymtabMessage(SymtabSec, Entries, NonVisibilityBitsUsed); for (const auto &Sym : Syms) - ELFDumperStyle->printSymbol(Obj, &Sym, Syms.begin(), StrTable, IsDynamic, + ELFDumperStyle->printSymbol(&Sym, Syms.begin(), StrTable, IsDynamic, NonVisibilityBitsUsed); } @@ -694,43 +693,37 @@ template class DumpStyle { public: TYPEDEF_ELF_TYPES(ELFT) - DumpStyle(ELFDumper *Dumper) : Dumper(Dumper) { + DumpStyle(ELFDumper *Dumper) + : Obj(*Dumper->getElfObject()->getELFFile()), Dumper(Dumper) { FileName = this->Dumper->getElfObject()->getFileName(); } virtual ~DumpStyle() = default; - virtual void printFileHeaders(const ELFFile *Obj) = 0; - virtual void printGroupSections(const ELFFile *Obj) = 0; - virtual void printRelocations(const ELFFile *Obj) = 0; - virtual void printSectionHeaders(const ELFFile *Obj) = 0; - virtual void printSymbols(const ELFFile *Obj, bool PrintSymbols, - bool PrintDynamicSymbols) = 0; - virtual void printHashSymbols(const ELFFile *Obj) {} - virtual void printDependentLibs(const ELFFile *Obj) = 0; - virtual void printDynamic(const ELFFile *Obj) {} - virtual void printDynamicRelocations(const ELFFile *Obj) = 0; - virtual void printSymtabMessage(const ELFFile *Obj, - const Elf_Shdr *Symtab, size_t Offset, + virtual void printFileHeaders() = 0; + virtual void printGroupSections() = 0; + virtual void printRelocations() = 0; + virtual void printSectionHeaders() = 0; + virtual void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) = 0; + virtual void printHashSymbols() {} + virtual void printDependentLibs() = 0; + virtual void printDynamic() {} + virtual void printDynamicRelocations() = 0; + virtual void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset, bool NonVisibilityBitsUsed) {} - virtual void printSymbol(const ELFFile *Obj, const Elf_Sym *Symbol, - const Elf_Sym *FirstSym, + virtual void printSymbol(const Elf_Sym *Symbol, const Elf_Sym *FirstSym, Optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed) = 0; - virtual void printProgramHeaders(const ELFFile *Obj, - bool PrintProgramHeaders, + virtual void printProgramHeaders(bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) = 0; - virtual void printVersionSymbolSection(const ELFFile *Obj, - const Elf_Shdr *Sec) = 0; - virtual void printVersionDefinitionSection(const ELFFile *Obj, - const Elf_Shdr *Sec) = 0; - virtual void printVersionDependencySection(const ELFFile *Obj, - const Elf_Shdr *Sec) = 0; - virtual void printHashHistograms(const ELFFile *Obj) = 0; - virtual void printCGProfile(const ELFFile *Obj) = 0; - virtual void printAddrsig(const ELFFile *Obj) = 0; - virtual void printNotes(const ELFFile *Obj) = 0; - virtual void printELFLinkerOptions(const ELFFile *Obj) = 0; + virtual void printVersionSymbolSection(const Elf_Shdr *Sec) = 0; + virtual void printVersionDefinitionSection(const Elf_Shdr *Sec) = 0; + virtual void printVersionDependencySection(const Elf_Shdr *Sec) = 0; + virtual void printHashHistograms() = 0; + virtual void printCGProfile() = 0; + virtual void printAddrsig() = 0; + virtual void printNotes() = 0; + virtual void printELFLinkerOptions() = 0; virtual void printStackSizes(const ELFObjectFile *Obj) = 0; void printNonRelocatableStackSizes(const ELFObjectFile *Obj, std::function PrintHeader); @@ -752,24 +745,22 @@ template class DumpStyle { protected: void printDependentLibsHelper( - const ELFFile *Obj, function_ref OnSectionStart, function_ref OnSectionEntry); - virtual void printRelReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rel &R, - unsigned RelIndex) = 0; - virtual void printRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rela &R, - unsigned RelIndex) = 0; + virtual void printRelReloc(unsigned SecIndex, const Elf_Shdr *SymTab, + const Elf_Rel &R, unsigned RelIndex) = 0; + virtual void printRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, + const Elf_Rela &R, unsigned RelIndex) = 0; virtual void printRelrReloc(const Elf_Relr &R) = 0; - void printRelocationsHelper(const ELFFile *Obj, const Elf_Shdr &Sec); + void printRelocationsHelper(const Elf_Shdr &Sec); - StringRef getPrintableSectionName(const ELFFile *Obj, - const Elf_Shdr &Sec) const; + StringRef getPrintableSectionName(const Elf_Shdr &Sec) const; void reportUniqueWarning(Error Err) const; + StringRef FileName; + const ELFFile &Obj; private: const ELFDumper *Dumper; @@ -787,31 +778,27 @@ template class GNUStyle : public DumpStyle { assert (&W.getOStream() == &llvm::fouts()); } - void printFileHeaders(const ELFO *Obj) override; - void printGroupSections(const ELFFile *Obj) override; - void printRelocations(const ELFO *Obj) override; - void printSectionHeaders(const ELFO *Obj) override; - void printSymbols(const ELFO *Obj, bool PrintSymbols, - bool PrintDynamicSymbols) override; - void printHashSymbols(const ELFO *Obj) override; - void printDependentLibs(const ELFFile *Obj) override; - void printDynamic(const ELFFile *Obj) override; - void printDynamicRelocations(const ELFO *Obj) override; - void printSymtabMessage(const ELFO *Obj, const Elf_Shdr *Symtab, - size_t Offset, bool NonVisibilityBitsUsed) override; - void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders, + void printFileHeaders() override; + void printGroupSections() override; + void printRelocations() override; + void printSectionHeaders() override; + void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override; + void printHashSymbols() override; + void printDependentLibs() override; + void printDynamic() override; + void printDynamicRelocations() override; + void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset, + bool NonVisibilityBitsUsed) override; + void printProgramHeaders(bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; - void printVersionSymbolSection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printVersionDefinitionSection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printVersionDependencySection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printHashHistograms(const ELFFile *Obj) override; - void printCGProfile(const ELFFile *Obj) override; - void printAddrsig(const ELFFile *Obj) override; - void printNotes(const ELFFile *Obj) override; - void printELFLinkerOptions(const ELFFile *Obj) override; + void printVersionSymbolSection(const Elf_Shdr *Sec) override; + void printVersionDefinitionSection(const Elf_Shdr *Sec) override; + void printVersionDependencySection(const Elf_Shdr *Sec) override; + void printHashHistograms() override; + void printCGProfile() override; + void printAddrsig() override; + void printNotes() override; + void printELFLinkerOptions() override; void printStackSizes(const ELFObjectFile *Obj) override; void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; void printMipsGOT(const MipsGOTParser &Parser) override; @@ -822,9 +809,8 @@ template class GNUStyle : public DumpStyle { void printHashHistogram(const Elf_Hash &HashTable); void printGnuHashHistogram(const Elf_GnuHash &GnuHashTable); - void printHashTableSymbols(const ELFO *Obj, const Elf_Hash &HashTable); - void printGnuHashTableSymbols(const ELFO *Obj, - const Elf_GnuHash &GnuHashTable); + void printHashTableSymbols(const Elf_Hash &HashTable); + void printGnuHashTableSymbols(const Elf_GnuHash &GnuHashTable); struct Field { std::string Str; @@ -876,35 +862,31 @@ template class GNUStyle : public DumpStyle { OS.flush(); return OS; } - void printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, uint32_t Sym, + void printHashedSymbol(const Elf_Sym *FirstSym, uint32_t Sym, StringRef StrTable, uint32_t Bucket); void printRelocHeader(unsigned SType); - void printRelReloc(const ELFO *Obj, unsigned SecIndex, const Elf_Shdr *SymTab, + void printRelReloc(unsigned SecIndex, const Elf_Shdr *SymTab, const Elf_Rel &R, unsigned RelIndex) override; - void printRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rela &R, - unsigned RelIndex) override; + void printRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, + const Elf_Rela &R, unsigned RelIndex) override; void printRelrReloc(const Elf_Relr &R) override; template - void printRelRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const RelTy &R, - unsigned RelIndex); + void printRelRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, + const RelTy &R, unsigned RelIndex); template - void printRelRelaReloc(const ELFO *Obj, const Elf_Sym *Sym, - StringRef SymbolName, const RelTy &R); - void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First, + void printRelRelaReloc(const Elf_Sym *Sym, StringRef SymbolName, + const RelTy &R); + void printSymbol(const Elf_Sym *Symbol, const Elf_Sym *First, Optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed) override; - std::string getSymbolSectionNdx(const ELFO *Obj, const Elf_Sym *Symbol, + std::string getSymbolSectionNdx(const Elf_Sym *Symbol, const Elf_Sym *FirstSym); - template - void printDynamicRelocation(const ELFO *Obj, const RelTy &R); - void printProgramHeaders(const ELFO *Obj); - void printSectionMapping(const ELFO *Obj); - void printGNUVersionSectionProlog(const ELFFile *Obj, - const typename ELFT::Shdr *Sec, + template void printDynamicRelocation(const RelTy &R); + void printProgramHeaders(); + void printSectionMapping(); + void printGNUVersionSectionProlog(const typename ELFT::Shdr *Sec, const Twine &Label, unsigned EntriesNum); }; @@ -928,28 +910,24 @@ template class LLVMStyle : public DumpStyle { LLVMStyle(ScopedPrinter &W, ELFDumper *Dumper) : DumpStyle(Dumper), W(W) {} - void printFileHeaders(const ELFO *Obj) override; - void printGroupSections(const ELFFile *Obj) override; - void printRelocations(const ELFO *Obj) override; - void printSectionHeaders(const ELFO *Obj) override; - void printSymbols(const ELFO *Obj, bool PrintSymbols, - bool PrintDynamicSymbols) override; - void printDependentLibs(const ELFFile *Obj) override; - void printDynamic(const ELFFile *Obj) override; - void printDynamicRelocations(const ELFO *Obj) override; - void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders, + void printFileHeaders() override; + void printGroupSections() override; + void printRelocations() override; + void printSectionHeaders() override; + void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override; + void printDependentLibs() override; + void printDynamic() override; + void printDynamicRelocations() override; + void printProgramHeaders(bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; - void printVersionSymbolSection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printVersionDefinitionSection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printVersionDependencySection(const ELFFile *Obj, - const Elf_Shdr *Sec) override; - void printHashHistograms(const ELFFile *Obj) override; - void printCGProfile(const ELFFile *Obj) override; - void printAddrsig(const ELFFile *Obj) override; - void printNotes(const ELFFile *Obj) override; - void printELFLinkerOptions(const ELFFile *Obj) override; + void printVersionSymbolSection(const Elf_Shdr *Sec) override; + void printVersionDefinitionSection(const Elf_Shdr *Sec) override; + void printVersionDependencySection(const Elf_Shdr *Sec) override; + void printHashHistograms() override; + void printCGProfile() override; + void printAddrsig() override; + void printNotes() override; + void printELFLinkerOptions() override; void printStackSizes(const ELFObjectFile *Obj) override; void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; void printMipsGOT(const MipsGOTParser &Parser) override; @@ -957,26 +935,24 @@ template class LLVMStyle : public DumpStyle { void printMipsABIFlags(const ELFObjectFile *Obj) override; private: - void printRelReloc(const ELFO *Obj, unsigned SecIndex, const Elf_Shdr *SymTab, + void printRelReloc(unsigned SecIndex, const Elf_Shdr *SymTab, const Elf_Rel &R, unsigned RelIndex) override; - void printRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rela &R, - unsigned RelIndex) override; + void printRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, + const Elf_Rela &R, unsigned RelIndex) override; void printRelrReloc(const Elf_Relr &R) override; template - void printRelRelaReloc(const ELFO *Obj, unsigned SecIndex, const RelTy &Rel, - unsigned RelIndex, const Elf_Shdr *SymTab); - template - void printDynamicRelocation(const ELFO *Obj, const RelTy& Rel); + void printRelRelaReloc(unsigned SecIndex, const RelTy &Rel, unsigned RelIndex, + const Elf_Shdr *SymTab); + template void printDynamicRelocation(const RelTy &Rel); - void printSymbols(const ELFO *Obj); - void printDynamicSymbols(const ELFO *Obj); + void printSymbols(); + void printDynamicSymbols(); void printSymbolSection(const Elf_Sym *Symbol, const Elf_Sym *First); - void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First, + void printSymbol(const Elf_Sym *Symbol, const Elf_Sym *First, Optional StrTable, bool IsDynamic, bool /*NonVisibilityBitsUsed*/) override; - void printProgramHeaders(const ELFO *Obj); - void printSectionMapping(const ELFO *Obj) {} + void printProgramHeaders(); + void printSectionMapping() {} ScopedPrinter &W; }; @@ -2273,71 +2249,66 @@ typename ELFDumper::Elf_Relr_Range ELFDumper::dyn_relrs() const { } template void ELFDumper::printFileHeaders() { - ELFDumperStyle->printFileHeaders(ObjF->getELFFile()); + ELFDumperStyle->printFileHeaders(); } template void ELFDumper::printSectionHeaders() { - ELFDumperStyle->printSectionHeaders(ObjF->getELFFile()); + ELFDumperStyle->printSectionHeaders(); } template void ELFDumper::printRelocations() { - ELFDumperStyle->printRelocations(ObjF->getELFFile()); + ELFDumperStyle->printRelocations(); } template void ELFDumper::printProgramHeaders( bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { - ELFDumperStyle->printProgramHeaders(ObjF->getELFFile(), PrintProgramHeaders, - PrintSectionMapping); + ELFDumperStyle->printProgramHeaders(PrintProgramHeaders, PrintSectionMapping); } template void ELFDumper::printVersionInfo() { // Dump version symbol section. - ELFDumperStyle->printVersionSymbolSection(ObjF->getELFFile(), - SymbolVersionSection); + ELFDumperStyle->printVersionSymbolSection(SymbolVersionSection); // Dump version definition section. - ELFDumperStyle->printVersionDefinitionSection(ObjF->getELFFile(), - SymbolVersionDefSection); + ELFDumperStyle->printVersionDefinitionSection(SymbolVersionDefSection); // Dump version dependency section. - ELFDumperStyle->printVersionDependencySection(ObjF->getELFFile(), - SymbolVersionNeedSection); + ELFDumperStyle->printVersionDependencySection(SymbolVersionNeedSection); } template void ELFDumper::printDependentLibs() { - ELFDumperStyle->printDependentLibs(ObjF->getELFFile()); + ELFDumperStyle->printDependentLibs(); } template void ELFDumper::printDynamicRelocations() { - ELFDumperStyle->printDynamicRelocations(ObjF->getELFFile()); + ELFDumperStyle->printDynamicRelocations(); } template void ELFDumper::printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) { - ELFDumperStyle->printSymbols(ObjF->getELFFile(), PrintSymbols, - PrintDynamicSymbols); + ELFDumperStyle->printSymbols(PrintSymbols, PrintDynamicSymbols); } template void ELFDumper::printHashSymbols() { - ELFDumperStyle->printHashSymbols(ObjF->getELFFile()); + ELFDumperStyle->printHashSymbols(); } template void ELFDumper::printHashHistograms() { - ELFDumperStyle->printHashHistograms(ObjF->getELFFile()); + ELFDumperStyle->printHashHistograms(); } template void ELFDumper::printCGProfile() { - ELFDumperStyle->printCGProfile(ObjF->getELFFile()); + ELFDumperStyle->printCGProfile(); } template void ELFDumper::printNotes() { - ELFDumperStyle->printNotes(ObjF->getELFFile()); + ELFDumperStyle->printNotes(); } template void ELFDumper::printELFLinkerOptions() { - ELFDumperStyle->printELFLinkerOptions(ObjF->getELFFile()); + ELFDumperStyle->printELFLinkerOptions(); } template void ELFDumper::printStackSizes() { @@ -2673,7 +2644,7 @@ template <> void ELFDumper::printUnwindInfo() { } // end anonymous namespace template void ELFDumper::printDynamicTable() { - ELFDumperStyle->printDynamic(ObjF->getELFFile()); + ELFDumperStyle->printDynamic(); } template void ELFDumper::printNeededLibraries() { @@ -2691,26 +2662,26 @@ template void ELFDumper::printNeededLibraries() { } template -static Error checkHashTable(const ELFFile *Obj, +static Error checkHashTable(const ELFFile &Obj, const typename ELFT::Hash *H, bool *IsHeaderValid = nullptr) { auto MakeError = [&](uint64_t Off, const Twine &Msg = "") { return createError("the hash table at offset 0x" + Twine::utohexstr(Off) + " goes past the end of the file (0x" + - Twine::utohexstr(Obj->getBufSize()) + ")" + Msg); + Twine::utohexstr(Obj.getBufSize()) + ")" + Msg); }; // Each SHT_HASH section starts from two 32-bit fields: nbucket and nchain. const unsigned HeaderSize = 2 * sizeof(typename ELFT::Word); - const uint64_t SecOffset = (const uint8_t *)H - Obj->base(); + const uint64_t SecOffset = (const uint8_t *)H - Obj.base(); if (IsHeaderValid) - *IsHeaderValid = Obj->getBufSize() - SecOffset >= HeaderSize; + *IsHeaderValid = Obj.getBufSize() - SecOffset >= HeaderSize; - if (Obj->getBufSize() - SecOffset < HeaderSize) + if (Obj.getBufSize() - SecOffset < HeaderSize) return MakeError(SecOffset); - if (Obj->getBufSize() - SecOffset - HeaderSize < + if (Obj.getBufSize() - SecOffset - HeaderSize < ((uint64_t)H->nbucket + H->nchain) * sizeof(typename ELFT::Word)) return MakeError(SecOffset, ", nbucket = " + Twine(H->nbucket) + ", nchain = " + Twine(H->nchain)); @@ -2718,20 +2689,19 @@ static Error checkHashTable(const ELFFile *Obj, } template -static Error checkGNUHashTable(const ELFFile *Obj, +static Error checkGNUHashTable(const ELFFile &Obj, const typename ELFT::GnuHash *GnuHashTable, bool *IsHeaderValid = nullptr) { const uint8_t *TableData = reinterpret_cast(GnuHashTable); - assert(TableData >= Obj->base() && - TableData < Obj->base() + Obj->getBufSize() && + assert(TableData >= Obj.base() && TableData < Obj.base() + Obj.getBufSize() && "GnuHashTable must always point to a location inside the file"); - uint64_t TableOffset = TableData - Obj->base(); + uint64_t TableOffset = TableData - Obj.base(); if (IsHeaderValid) - *IsHeaderValid = TableOffset + /*Header size:*/ 16 < Obj->getBufSize(); + *IsHeaderValid = TableOffset + /*Header size:*/ 16 < Obj.getBufSize(); if (TableOffset + 16 + (uint64_t)GnuHashTable->nbuckets * 4 + (uint64_t)GnuHashTable->maskwords * sizeof(typename ELFT::Off) >= - Obj->getBufSize()) + Obj.getBufSize()) return createError("unable to dump the SHT_GNU_HASH " "section at 0x" + Twine::utohexstr(TableOffset) + @@ -2745,7 +2715,7 @@ template void ELFDumper::printHashTable() { return; bool IsHeaderValid; - Error Err = checkHashTable(ObjF->getELFFile(), HashTable, &IsHeaderValid); + Error Err = checkHashTable(*ObjF->getELFFile(), HashTable, &IsHeaderValid); if (IsHeaderValid) { W.printNumber("Num Buckets", HashTable->nbucket); W.printNumber("Num Chains", HashTable->nchain); @@ -2801,8 +2771,8 @@ void ELFDumper::printGnuHashTable(const object::ObjectFile *Obj) { return; bool IsHeaderValid; - Error Err = - checkGNUHashTable(ObjF->getELFFile(), GnuHashTable, &IsHeaderValid); + Error Err = checkGNUHashTable(*ObjF->getELFFile(), GnuHashTable, + &IsHeaderValid); if (IsHeaderValid) { W.printNumber("Num Buckets", GnuHashTable->nbuckets); W.printNumber("First Hashed Symbol Index", GnuHashTable->symndx); @@ -3093,7 +3063,7 @@ Error MipsGOTParser::findPLT(Elf_Dyn_Range DynTable) { PltSymTable = *PltSymTableOrErr; else return createError("unable to get a symbol table linked to the " + - describe(Obj, *PltRelSec) + ": " + + describe(*Obj, *PltRelSec) + ": " + toString(PltSymTableOrErr.takeError())); if (Expected StrTabOrErr = @@ -3101,7 +3071,7 @@ Error MipsGOTParser::findPLT(Elf_Dyn_Range DynTable) { PltStrTable = *StrTabOrErr; else return createError("unable to get a string table for the " + - describe(Obj, *PltSymTable) + ": " + + describe(*Obj, *PltSymTable) + ": " + toString(StrTabOrErr.takeError())); return Error::success(); @@ -3431,11 +3401,11 @@ template void ELFDumper::printStackMap() const { } template void ELFDumper::printGroupSections() { - ELFDumperStyle->printGroupSections(ObjF->getELFFile()); + ELFDumperStyle->printGroupSections(); } template void ELFDumper::printAddrsig() { - ELFDumperStyle->printAddrsig(ObjF->getELFFile()); + ELFDumperStyle->printAddrsig(); } static inline void printFields(formatted_raw_ostream &OS, StringRef Str1, @@ -3448,34 +3418,34 @@ static inline void printFields(formatted_raw_ostream &OS, StringRef Str1, } template -static std::string getSectionHeadersNumString(const ELFFile *Obj, +static std::string getSectionHeadersNumString(const ELFFile &Obj, StringRef FileName) { - const typename ELFT::Ehdr *ElfHeader = Obj->getHeader(); + const typename ELFT::Ehdr *ElfHeader = Obj.getHeader(); if (ElfHeader->e_shnum != 0) return to_string(ElfHeader->e_shnum); - ArrayRef Arr = cantFail(Obj->sections()); + ArrayRef Arr = cantFail(Obj.sections()); if (Arr.empty()) return "0"; return "0 (" + to_string(Arr[0].sh_size) + ")"; } template -static std::string getSectionHeaderTableIndexString(const ELFFile *Obj, +static std::string getSectionHeaderTableIndexString(const ELFFile &Obj, StringRef FileName) { - const typename ELFT::Ehdr *ElfHeader = Obj->getHeader(); + const typename ELFT::Ehdr *ElfHeader = Obj.getHeader(); if (ElfHeader->e_shstrndx != SHN_XINDEX) return to_string(ElfHeader->e_shstrndx); - ArrayRef Arr = cantFail(Obj->sections()); + ArrayRef Arr = cantFail(Obj.sections()); if (Arr.empty()) return "65535 (corrupt: out of range)"; return to_string(ElfHeader->e_shstrndx) + " (" + to_string(Arr[0].sh_link) + ")"; } -template void GNUStyle::printFileHeaders(const ELFO *Obj) { - const Elf_Ehdr *e = Obj->getHeader(); +template void GNUStyle::printFileHeaders() { + const Elf_Ehdr *e = this->Obj.getHeader(); OS << "ELF Header:\n"; OS << " Magic: "; std::string Str; @@ -3529,9 +3499,9 @@ template void GNUStyle::printFileHeaders(const ELFO *Obj) { printFields(OS, "Number of program headers:", Str); Str = to_string(e->e_shentsize) + " (bytes)"; printFields(OS, "Size of section headers:", Str); - Str = getSectionHeadersNumString(Obj, this->FileName); + Str = getSectionHeadersNumString(this->Obj, this->FileName); printFields(OS, "Number of section headers:", Str); - Str = getSectionHeaderTableIndexString(Obj, this->FileName); + Str = getSectionHeaderTableIndexString(this->Obj, this->FileName); printFields(OS, "Section header string table index:", Str); } @@ -3553,7 +3523,7 @@ struct GroupSection { }; template -std::vector getGroups(const ELFFile *Obj, +std::vector getGroups(const ELFFile &Obj, StringRef FileName) { using Elf_Shdr = typename ELFT::Shdr; using Elf_Sym = typename ELFT::Sym; @@ -3561,21 +3531,21 @@ std::vector getGroups(const ELFFile *Obj, std::vector Ret; uint64_t I = 0; - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + for (const Elf_Shdr &Sec : cantFail(Obj.sections())) { ++I; if (Sec.sh_type != ELF::SHT_GROUP) continue; const Elf_Shdr *Symtab = - unwrapOrError(FileName, Obj->getSection(Sec.sh_link)); + unwrapOrError(FileName, Obj.getSection(Sec.sh_link)); StringRef StrTable = - unwrapOrError(FileName, Obj->getStringTableForSymtab(*Symtab)); + unwrapOrError(FileName, Obj.getStringTableForSymtab(*Symtab)); const Elf_Sym *Sym = unwrapOrError( - FileName, Obj->template getEntry(Symtab, Sec.sh_info)); + FileName, Obj.template getEntry(Symtab, Sec.sh_info)); auto Data = unwrapOrError( - FileName, Obj->template getSectionContentsAsArray(&Sec)); + FileName, Obj.template getSectionContentsAsArray(&Sec)); - StringRef Name = unwrapOrError(FileName, Obj->getSectionName(&Sec)); + StringRef Name = unwrapOrError(FileName, Obj.getSectionName(&Sec)); StringRef Signature = StrTable.data() + Sym->st_name; Ret.push_back({Name, maybeDemangle(Signature), @@ -3588,8 +3558,8 @@ std::vector getGroups(const ELFFile *Obj, std::vector &GM = Ret.back().Members; for (uint32_t Ndx : Data.slice(1)) { - auto Sec = unwrapOrError(FileName, Obj->getSection(Ndx)); - const StringRef Name = unwrapOrError(FileName, Obj->getSectionName(Sec)); + auto Sec = unwrapOrError(FileName, Obj.getSection(Ndx)); + const StringRef Name = unwrapOrError(FileName, Obj.getSectionName(Sec)); GM.push_back({Name, Ndx}); } } @@ -3607,8 +3577,8 @@ mapSectionsToGroups(ArrayRef Groups) { } // namespace -template void GNUStyle::printGroupSections(const ELFO *Obj) { - std::vector V = getGroups(Obj, this->FileName); +template void GNUStyle::printGroupSections() { + std::vector V = getGroups(this->Obj, this->FileName); DenseMap Map = mapSectionsToGroups(V); for (const GroupSection &G : V) { OS << "\n" @@ -3634,17 +3604,15 @@ template void GNUStyle::printGroupSections(const ELFO *Obj) { } template -void GNUStyle::printRelReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rel &R, - unsigned RelIndex) { - printRelRelaReloc(Obj, SecIndex, SymTab, R, RelIndex); +void GNUStyle::printRelReloc(unsigned SecIndex, const Elf_Shdr *SymTab, + const Elf_Rel &R, unsigned RelIndex) { + printRelRelaReloc(SecIndex, SymTab, R, RelIndex); } template -void GNUStyle::printRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rela &R, - unsigned RelIndex) { - printRelRelaReloc(Obj, SecIndex, SymTab, R, RelIndex); +void GNUStyle::printRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, + const Elf_Rela &R, unsigned RelIndex) { + printRelRelaReloc(SecIndex, SymTab, R, RelIndex); } template void GNUStyle::printRelrReloc(const Elf_Relr &R) { @@ -3653,7 +3621,7 @@ template void GNUStyle::printRelrReloc(const Elf_Relr &R) { template template -void GNUStyle::printRelRelaReloc(const ELFO *Obj, unsigned SecIndex, +void GNUStyle::printRelRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, const RelTy &R, unsigned RelIndex) { Expected> Target = @@ -3663,7 +3631,7 @@ void GNUStyle::printRelRelaReloc(const ELFO *Obj, unsigned SecIndex, "unable to print relocation " + Twine(RelIndex) + " in section " + Twine(SecIndex) + ": " + toString(Target.takeError()))); else - printRelRelaReloc(Obj, /*Sym=*/Target->first, /*Name=*/Target->second, R); + printRelRelaReloc(/*Sym=*/Target->first, /*Name=*/Target->second, R); } template @@ -3678,8 +3646,8 @@ static Optional getAddend(const typename ELFT::Rel &) { template template -void GNUStyle::printRelRelaReloc(const ELFO *Obj, const Elf_Sym *Sym, - StringRef SymbolName, const RelTy &R) { +void GNUStyle::printRelRelaReloc(const Elf_Sym *Sym, StringRef SymbolName, + const RelTy &R) { // First two fields are bit width dependent. The rest of them are fixed width. unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias}; @@ -3689,7 +3657,7 @@ void GNUStyle::printRelRelaReloc(const ELFO *Obj, const Elf_Sym *Sym, Fields[1].Str = to_string(format_hex_no_prefix(R.r_info, Width)); SmallString<32> RelocName; - Obj->getRelocationTypeName(R.getType(Obj->isMips64EL()), RelocName); + this->Obj.getRelocationTypeName(R.getType(this->Obj.isMips64EL()), RelocName); Fields[2].Str = RelocName.c_str(); if (Sym) @@ -3744,14 +3712,14 @@ static bool isRelocationSec(const typename ELFT::Shdr &Sec) { Sec.sh_type == ELF::SHT_ANDROID_RELR; } -template void GNUStyle::printRelocations(const ELFO *Obj) { +template void GNUStyle::printRelocations() { auto GetEntriesNum = [&](const Elf_Shdr &Sec) -> Expected { // Android's packed relocation section needs to be unpacked first // to get the actual number of entries. if (Sec.sh_type == ELF::SHT_ANDROID_REL || Sec.sh_type == ELF::SHT_ANDROID_RELA) { Expected> RelasOrErr = - Obj->android_relas(&Sec); + this->Obj.android_relas(&Sec); if (!RelasOrErr) return RelasOrErr.takeError(); return RelasOrErr->size(); @@ -3759,17 +3727,17 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { if (!opts::RawRelr && (Sec.sh_type == ELF::SHT_RELR || Sec.sh_type == ELF::SHT_ANDROID_RELR)) { - Expected RelrsOrErr = Obj->relrs(&Sec); + Expected RelrsOrErr = this->Obj.relrs(&Sec); if (!RelrsOrErr) return RelrsOrErr.takeError(); - return Obj->decode_relrs(*RelrsOrErr).size(); + return this->Obj.decode_relrs(*RelrsOrErr).size(); } return Sec.getEntityCount(); }; bool HasRelocSections = false; - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (!isRelocationSec(Sec)) continue; HasRelocSections = true; @@ -3779,16 +3747,16 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { EntriesNum = std::to_string(*NumOrErr); else this->reportUniqueWarning(createError( - "unable to get the number of relocations in " + describe(Obj, Sec) + - ": " + toString(NumOrErr.takeError()))); + "unable to get the number of relocations in " + + describe(this->Obj, Sec) + ": " + toString(NumOrErr.takeError()))); uintX_t Offset = Sec.sh_offset; - StringRef Name = this->getPrintableSectionName(Obj, Sec); + StringRef Name = this->getPrintableSectionName(Sec); OS << "\nRelocation section '" << Name << "' at offset 0x" << to_hexString(Offset, false) << " contains " << EntriesNum << " entries:\n"; printRelocHeader(Sec.sh_type); - this->printRelocationsHelper(Obj, Sec); + this->printRelocationsHelper(Sec); } if (!HasRelocSections) OS << "\nThere are no relocations in this file.\n"; @@ -3846,13 +3814,12 @@ static void printSectionDescription(formatted_raw_ostream &OS, OS << "p (processor specific)\n"; } -template -void GNUStyle::printSectionHeaders(const ELFO *Obj) { +template void GNUStyle::printSectionHeaders() { unsigned Bias = ELFT::Is64Bits ? 0 : 8; - ArrayRef Sections = cantFail(Obj->sections()); + ArrayRef Sections = cantFail(this->Obj.sections()); OS << "There are " << to_string(Sections.size()) << " section headers, starting at offset " - << "0x" << to_hexString(Obj->getHeader()->e_shoff, false) << ":\n\n"; + << "0x" << to_hexString(this->Obj.getHeader()->e_shoff, false) << ":\n\n"; OS << "Section Headers:\n"; Field Fields[11] = { {"[Nr]", 2}, {"Name", 7}, {"Type", 25}, @@ -3864,8 +3831,8 @@ void GNUStyle::printSectionHeaders(const ELFO *Obj) { OS << "\n"; StringRef SecStrTable; - if (Expected SecStrTableOrErr = - Obj->getSectionStringTable(Sections, this->dumper()->WarningHandler)) + if (Expected SecStrTableOrErr = this->Obj.getSectionStringTable( + Sections, this->dumper()->WarningHandler)) SecStrTable = *SecStrTableOrErr; else this->reportUniqueWarning(SecStrTableOrErr.takeError()); @@ -3877,15 +3844,15 @@ void GNUStyle::printSectionHeaders(const ELFO *Obj) { Fields[1].Str = ""; else Fields[1].Str = std::string(unwrapOrError( - this->FileName, Obj->getSectionName(&Sec, SecStrTable))); + this->FileName, this->Obj.getSectionName(&Sec, SecStrTable))); Fields[2].Str = - getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type); + getSectionTypeString(this->Obj.getHeader()->e_machine, Sec.sh_type); Fields[3].Str = to_string(format_hex_no_prefix(Sec.sh_addr, ELFT::Is64Bits ? 16 : 8)); Fields[4].Str = to_string(format_hex_no_prefix(Sec.sh_offset, 6)); Fields[5].Str = to_string(format_hex_no_prefix(Sec.sh_size, 6)); Fields[6].Str = to_string(format_hex_no_prefix(Sec.sh_entsize, 2)); - Fields[7].Str = getGNUFlags(Obj->getHeader()->e_machine, Sec.sh_flags); + Fields[7].Str = getGNUFlags(this->Obj.getHeader()->e_machine, Sec.sh_flags); Fields[8].Str = to_string(Sec.sh_link); Fields[9].Str = to_string(Sec.sh_info); Fields[10].Str = to_string(Sec.sh_addralign); @@ -3905,16 +3872,15 @@ void GNUStyle::printSectionHeaders(const ELFO *Obj) { OS << "\n"; ++SectionIndex; } - printSectionDescription(OS, Obj->getHeader()->e_machine); + printSectionDescription(OS, this->Obj.getHeader()->e_machine); } template -void GNUStyle::printSymtabMessage(const ELFO *Obj, const Elf_Shdr *Symtab, - size_t Entries, +void GNUStyle::printSymtabMessage(const Elf_Shdr *Symtab, size_t Entries, bool NonVisibilityBitsUsed) { StringRef Name; if (Symtab) - Name = this->getPrintableSectionName(Obj, *Symtab); + Name = this->getPrintableSectionName(*Symtab); if (!Name.empty()) OS << "\nSymbol table '" << Name << "'"; else @@ -3932,8 +3898,7 @@ void GNUStyle::printSymtabMessage(const ELFO *Obj, const Elf_Shdr *Symtab, } template -std::string GNUStyle::getSymbolSectionNdx(const ELFO *Obj, - const Elf_Sym *Symbol, +std::string GNUStyle::getSymbolSectionNdx(const Elf_Sym *Symbol, const Elf_Sym *FirstSym) { unsigned SectionIndex = Symbol->st_shndx; switch (SectionIndex) { @@ -3976,8 +3941,7 @@ std::string GNUStyle::getSymbolSectionNdx(const ELFO *Obj, } template -void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, - const Elf_Sym *FirstSym, +void GNUStyle::printSymbol(const Elf_Sym *Symbol, const Elf_Sym *FirstSym, Optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed) { unsigned Bias = ELFT::Is64Bits ? 8 : 0; @@ -3989,7 +3953,7 @@ void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, Fields[2].Str = to_string(format_decimal(Symbol->st_size, 5)); unsigned char SymbolType = Symbol->getType(); - if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU && + if (this->Obj.getHeader()->e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) Fields[3].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes)); else @@ -4004,7 +3968,7 @@ void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, " [st_other, 2)) + ">]"; Fields[6].Column += NonVisibilityBitsUsed ? 13 : 0; - Fields[6].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym); + Fields[6].Str = getSymbolSectionNdx(Symbol, FirstSym); Fields[7].Str = this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic); @@ -4014,9 +3978,8 @@ void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, } template -void GNUStyle::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, - uint32_t Sym, StringRef StrTable, - uint32_t Bucket) { +void GNUStyle::printHashedSymbol(const Elf_Sym *FirstSym, uint32_t Sym, + StringRef StrTable, uint32_t Bucket) { unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[9] = {0, 6, 11, 20 + Bias, 25 + Bias, 34 + Bias, 41 + Bias, 49 + Bias, 53 + Bias}; @@ -4029,7 +3992,7 @@ void GNUStyle::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, Fields[3].Str = to_string(format_decimal(Symbol->st_size, 5)); unsigned char SymbolType = Symbol->getType(); - if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU && + if (this->Obj.getHeader()->e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) Fields[4].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes)); else @@ -4039,7 +4002,7 @@ void GNUStyle::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings)); Fields[6].Str = printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities)); - Fields[7].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym); + Fields[7].Str = getSymbolSectionNdx(Symbol, FirstSym); Fields[8].Str = this->dumper()->getFullSymbolName(Symbol, StrTable, true); for (auto &Entry : Fields) @@ -4048,8 +4011,7 @@ void GNUStyle::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, } template -void GNUStyle::printSymbols(const ELFO *Obj, bool PrintSymbols, - bool PrintDynamicSymbols) { +void GNUStyle::printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) { if (!PrintSymbols && !PrintDynamicSymbols) return; // GNU readelf prints both the .dynsym and .symtab with --symbols. @@ -4059,8 +4021,7 @@ void GNUStyle::printSymbols(const ELFO *Obj, bool PrintSymbols, } template -void GNUStyle::printHashTableSymbols(const ELFO *Obj, - const Elf_Hash &SysVHash) { +void GNUStyle::printHashTableSymbols(const Elf_Hash &SysVHash) { StringRef StringTable = this->dumper()->getDynamicStringTable(); if (StringTable.empty()) return; @@ -4100,15 +4061,14 @@ void GNUStyle::printHashTableSymbols(const ELFO *Obj, break; } - printHashedSymbol(Obj, FirstSym, Ch, StringTable, Buc); + printHashedSymbol(FirstSym, Ch, StringTable, Buc); Visited[Ch] = true; } } } template -void GNUStyle::printGnuHashTableSymbols(const ELFO *Obj, - const Elf_GnuHash &GnuHash) { +void GNUStyle::printGnuHashTableSymbols(const Elf_GnuHash &GnuHash) { StringRef StringTable = this->dumper()->getDynamicStringTable(); if (StringTable.empty()) return; @@ -4132,7 +4092,7 @@ void GNUStyle::printGnuHashTableSymbols(const ELFO *Obj, uint32_t GnuHashable = Index - GnuHash.symndx; // Print whole chain while (true) { - printHashedSymbol(Obj, FirstSym, Index++, StringTable, Buc); + printHashedSymbol(FirstSym, Index++, StringTable, Buc); // Chain ends at symbol with stopper bit if ((GnuHash.values(DynSyms.size())[GnuHashable++] & 1) == 1) break; @@ -4140,13 +4100,13 @@ void GNUStyle::printGnuHashTableSymbols(const ELFO *Obj, } } -template void GNUStyle::printHashSymbols(const ELFO *Obj) { +template void GNUStyle::printHashSymbols() { if (const Elf_Hash *SysVHash = this->dumper()->getHashTable()) { OS << "\n Symbol table of .hash for image:\n"; - if (Error E = checkHashTable(Obj, SysVHash)) + if (Error E = checkHashTable(this->Obj, SysVHash)) this->reportUniqueWarning(std::move(E)); else - printHashTableSymbols(Obj, *SysVHash); + printHashTableSymbols(*SysVHash); } // Try printing the .gnu.hash table. @@ -4158,10 +4118,10 @@ template void GNUStyle::printHashSymbols(const ELFO *Obj) { OS << " Num Buc: Value Size Type Bind Vis Ndx Name"; OS << "\n"; - if (Error E = checkGNUHashTable(Obj, GnuHash)) + if (Error E = checkGNUHashTable(this->Obj, GnuHash)) this->reportUniqueWarning(std::move(E)); else - printGnuHashTableSymbols(Obj, *GnuHash); + printGnuHashTableSymbols(*GnuHash); } } @@ -4247,21 +4207,19 @@ static bool checkPTDynamic(const typename ELFT::Phdr &Phdr, template void GNUStyle::printProgramHeaders( - const ELFO *Obj, bool PrintProgramHeaders, - cl::boolOrDefault PrintSectionMapping) { + bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { if (PrintProgramHeaders) - printProgramHeaders(Obj); + printProgramHeaders(); // Display the section mapping along with the program headers, unless // -section-mapping is explicitly set to false. if (PrintSectionMapping != cl::BOU_FALSE) - printSectionMapping(Obj); + printSectionMapping(); } -template -void GNUStyle::printProgramHeaders(const ELFO *Obj) { +template void GNUStyle::printProgramHeaders() { unsigned Bias = ELFT::Is64Bits ? 8 : 0; - const Elf_Ehdr *Header = Obj->getHeader(); + const Elf_Ehdr *Header = this->Obj.getHeader(); Field Fields[8] = {2, 17, 26, 37 + Bias, 48 + Bias, 56 + Bias, 64 + Bias, 68 + Bias}; OS << "\nElf file type is " @@ -4280,7 +4238,7 @@ void GNUStyle::printProgramHeaders(const ELFO *Obj) { unsigned Width = ELFT::Is64Bits ? 18 : 10; unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7; - Expected> PhdrsOrErr = Obj->program_headers(); + Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning(createError("unable to dump program headers: " + toString(PhdrsOrErr.takeError()))); @@ -4307,15 +4265,15 @@ void GNUStyle::printProgramHeaders(const ELFO *Obj) { this->FileName); }; - if (Phdr.p_offset >= Obj->getBufSize()) { + if (Phdr.p_offset >= this->Obj.getBufSize()) { ReportBadInterp("it goes past the end of the file (0x" + - Twine::utohexstr(Obj->getBufSize()) + ")"); + Twine::utohexstr(this->Obj.getBufSize()) + ")"); continue; } const char *Data = - reinterpret_cast(Obj->base()) + Phdr.p_offset; - size_t MaxSize = Obj->getBufSize() - Phdr.p_offset; + reinterpret_cast(this->Obj.base()) + Phdr.p_offset; + size_t MaxSize = this->Obj.getBufSize() - Phdr.p_offset; size_t Len = strnlen(Data, MaxSize); if (Len == MaxSize) { ReportBadInterp("it is not null-terminated"); @@ -4329,13 +4287,12 @@ void GNUStyle::printProgramHeaders(const ELFO *Obj) { } } -template -void GNUStyle::printSectionMapping(const ELFO *Obj) { +template void GNUStyle::printSectionMapping() { OS << "\n Section to Segment mapping:\n Segment Sections...\n"; DenseSet BelongsToSegment; int Phnum = 0; - Expected> PhdrsOrErr = Obj->program_headers(); + Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning(createError( "can't read program headers to build section to segment mapping: " + @@ -4347,7 +4304,7 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { std::string Sections; OS << format(" %2.2d ", Phnum++); // Check if each section is in a segment and then print mapping. - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (Sec.sh_type == ELF::SHT_NULL) continue; @@ -4357,7 +4314,8 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { if (checkTLSSections(Phdr, Sec) && checkOffsets(Phdr, Sec) && checkVMA(Phdr, Sec) && checkPTDynamic(Phdr, Sec)) { Sections += - unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() + + unwrapOrError(this->FileName, this->Obj.getSectionName(&Sec)) + .str() + " "; BelongsToSegment.insert(&Sec); } @@ -4368,10 +4326,11 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { // Display sections that do not belong to a segment. std::string Sections; - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (BelongsToSegment.find(&Sec) == BelongsToSegment.end()) Sections += - unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() + ' '; + unwrapOrError(this->FileName, this->Obj.getSectionName(&Sec)).str() + + ' '; } if (!Sections.empty()) { OS << " None " << Sections << '\n'; @@ -4386,10 +4345,10 @@ template struct RelSymbol { }; template -RelSymbol getSymbolForReloc(const ELFFile *Obj, StringRef FileName, +RelSymbol getSymbolForReloc(const ELFFile &Obj, StringRef FileName, const ELFDumper *Dumper, const RelTy &Reloc) { - uint32_t SymIndex = Reloc.getSymbol(Obj->isMips64EL()); + uint32_t SymIndex = Reloc.getSymbol(Obj.isMips64EL()); auto WarnAndReturn = [&](const typename ELFT::Sym *Sym, const Twine &Reason) -> RelSymbol { reportWarning( @@ -4424,21 +4383,22 @@ RelSymbol getSymbolForReloc(const ELFFile *Obj, StringRef FileName, template template -void GNUStyle::printDynamicRelocation(const ELFO *Obj, const RelTy &R) { - RelSymbol S = getSymbolForReloc(Obj, this->FileName, this->dumper(), R); - printRelRelaReloc(Obj, S.Sym, S.Name, R); +void GNUStyle::printDynamicRelocation(const RelTy &R) { + RelSymbol S = + getSymbolForReloc(this->Obj, this->FileName, this->dumper(), R); + printRelRelaReloc(S.Sym, S.Name, R); } template -static size_t getMaxDynamicTagSize(const ELFFile *Obj, +static size_t getMaxDynamicTagSize(const ELFFile &Obj, typename ELFT::DynRange Tags) { size_t Max = 0; for (const typename ELFT::Dyn &Dyn : Tags) - Max = std::max(Max, Obj->getDynamicTagAsString(Dyn.d_tag).size()); + Max = std::max(Max, Obj.getDynamicTagAsString(Dyn.d_tag).size()); return Max; } -template void GNUStyle::printDynamic(const ELFO *Obj) { +template void GNUStyle::printDynamic() { Elf_Dyn_Range Table = this->dumper()->dynamic_table(); if (Table.empty()) return; @@ -4448,12 +4408,12 @@ template void GNUStyle::printDynamic(const ELFO *Obj) { OS << "Dynamic section at offset " << format_hex(reinterpret_cast(DynamicTableRegion.Addr) - - Obj->base(), + this->Obj.base(), 1) << " contains " << Table.size() << " entries:\n"; // The type name is surrounded with round brackets, hence add 2. - size_t MaxTagSize = getMaxDynamicTagSize(Obj, Table) + 2; + size_t MaxTagSize = getMaxDynamicTagSize(this->Obj, Table) + 2; // The "Name/Value" column should be indented from the "Type" column by N // spaces, where N = MaxTagSize - length of "Type" (4) + trailing // space (1) = 3. @@ -4464,15 +4424,14 @@ template void GNUStyle::printDynamic(const ELFO *Obj) { for (auto Entry : Table) { uintX_t Tag = Entry.getTag(); std::string Type = - std::string("(") + Obj->getDynamicTagAsString(Tag).c_str() + ")"; + std::string("(") + this->Obj.getDynamicTagAsString(Tag).c_str() + ")"; std::string Value = this->dumper()->getDynamicEntry(Tag, Entry.getVal()); OS << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10) << format(ValueFmt.c_str(), Type.c_str()) << Value << "\n"; } } -template -void GNUStyle::printDynamicRelocations(const ELFO *Obj) { +template void GNUStyle::printDynamicRelocations() { const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion(); const DynRegionInfo &DynRelaRegion = this->dumper()->getDynRelaRegion(); const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion(); @@ -4480,70 +4439,70 @@ void GNUStyle::printDynamicRelocations(const ELFO *Obj) { if (DynRelaRegion.Size > 0) { OS << "\n'RELA' relocation section at offset " << format_hex(reinterpret_cast(DynRelaRegion.Addr) - - Obj->base(), + this->Obj.base(), 1) << " contains " << DynRelaRegion.Size << " bytes:\n"; printRelocHeader(ELF::SHT_RELA); for (const Elf_Rela &Rela : this->dumper()->dyn_relas()) - printDynamicRelocation(Obj, Rela); + printDynamicRelocation(Rela); } if (DynRelRegion.Size > 0) { OS << "\n'REL' relocation section at offset " << format_hex(reinterpret_cast(DynRelRegion.Addr) - - Obj->base(), + this->Obj.base(), 1) << " contains " << DynRelRegion.Size << " bytes:\n"; printRelocHeader(ELF::SHT_REL); for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) - printDynamicRelocation(Obj, Rel); + printDynamicRelocation(Rel); } if (DynRelrRegion.Size > 0) { OS << "\n'RELR' relocation section at offset " << format_hex(reinterpret_cast(DynRelrRegion.Addr) - - Obj->base(), + this->Obj.base(), 1) << " contains " << DynRelrRegion.Size << " bytes:\n"; printRelocHeader(ELF::SHT_REL); Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); - for (const Elf_Rel &R : Obj->decode_relrs(Relrs)) - printDynamicRelocation(Obj, R); + for (const Elf_Rel &R : this->Obj.decode_relrs(Relrs)) + printDynamicRelocation(R); } if (DynPLTRelRegion.Size) { OS << "\n'PLT' relocation section at offset " << format_hex(reinterpret_cast(DynPLTRelRegion.Addr) - - Obj->base(), + this->Obj.base(), 1) << " contains " << DynPLTRelRegion.Size << " bytes:\n"; if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) { printRelocHeader(ELF::SHT_RELA); for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Obj, Rela); + printDynamicRelocation(Rela); } else { printRelocHeader(ELF::SHT_REL); for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Obj, Rel); + printDynamicRelocation(Rel); } } } template void GNUStyle::printGNUVersionSectionProlog( - const ELFFile *Obj, const typename ELFT::Shdr *Sec, - const Twine &Label, unsigned EntriesNum) { - StringRef SecName = unwrapOrError(this->FileName, Obj->getSectionName(Sec)); + const typename ELFT::Shdr *Sec, const Twine &Label, unsigned EntriesNum) { + StringRef SecName = + unwrapOrError(this->FileName, this->Obj.getSectionName(Sec)); OS << Label << " section '" << SecName << "' " << "contains " << EntriesNum << " entries:\n"; StringRef SymTabName = ""; Expected SymTabOrErr = - Obj->getSection(Sec->sh_link); + this->Obj.getSection(Sec->sh_link); if (SymTabOrErr) SymTabName = - unwrapOrError(this->FileName, Obj->getSectionName(*SymTabOrErr)); + unwrapOrError(this->FileName, this->Obj.getSectionName(*SymTabOrErr)); else this->reportUniqueWarning(createError("invalid section linked to " + - describe(Obj, *Sec) + ": " + + describe(this->Obj, *Sec) + ": " + toString(SymTabOrErr.takeError()))); OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16) @@ -4552,16 +4511,15 @@ void GNUStyle::printGNUVersionSectionProlog( } template -void GNUStyle::printVersionSymbolSection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void GNUStyle::printVersionSymbolSection(const Elf_Shdr *Sec) { if (!Sec) return; - printGNUVersionSectionProlog(Obj, Sec, "Version symbols", + printGNUVersionSectionProlog(Sec, "Version symbols", Sec->sh_size / sizeof(Elf_Versym)); Expected> VerTableOrErr = this->dumper()->getVersionTable(Sec, /*SymTab=*/nullptr, - /*StrTab=*/nullptr); + /*StrTab=*/nullptr); if (!VerTableOrErr) { this->reportUniqueWarning(VerTableOrErr.takeError()); return; @@ -4581,9 +4539,10 @@ void GNUStyle::printVersionSymbolSection(const ELFFile *Obj, this->dumper()->getSymbolVersionByIndex(Ndx, IsDefault); if (!NameOrErr) { if (!NameOrErr) - this->reportUniqueWarning(createError( - "unable to get a version for entry " + Twine(I) + " of " + - describe(Obj, *Sec) + ": " + toString(NameOrErr.takeError()))); + this->reportUniqueWarning( + createError("unable to get a version for entry " + Twine(I) + + " of " + describe(this->Obj, *Sec) + ": " + + toString(NameOrErr.takeError()))); Versions.emplace_back(""); continue; } @@ -4627,12 +4586,11 @@ static std::string versionFlagToString(unsigned Flags) { } template -void GNUStyle::printVersionDefinitionSection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void GNUStyle::printVersionDefinitionSection(const Elf_Shdr *Sec) { if (!Sec) return; - printGNUVersionSectionProlog(Obj, Sec, "Version definition", Sec->sh_info); + printGNUVersionSectionProlog(Sec, "Version definition", Sec->sh_info); Expected> V = this->dumper()->getVersionDefinitions(Sec); if (!V) { @@ -4655,13 +4613,12 @@ void GNUStyle::printVersionDefinitionSection(const ELFFile *Obj, } template -void GNUStyle::printVersionDependencySection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void GNUStyle::printVersionDependencySection(const Elf_Shdr *Sec) { if (!Sec) return; unsigned VerneedNum = Sec->sh_info; - printGNUVersionSectionProlog(Obj, Sec, "Version needs", VerneedNum); + printGNUVersionSectionProlog(Sec, "Version needs", VerneedNum); Expected> V = this->dumper()->getVersionDependencies(Sec); @@ -4796,11 +4753,10 @@ void GNUStyle::printGnuHashHistogram(const Elf_GnuHash &GnuHashTable) { // dynamic symbol table. The table shows the number of hash buckets for // different lengths of chains as an absolute number and percentage of the total // buckets, and the cumulative coverage of symbols for each set of buckets. -template -void GNUStyle::printHashHistograms(const ELFFile *Obj) { +template void GNUStyle::printHashHistograms() { // Print histogram for the .hash section. if (const Elf_Hash *HashTable = this->dumper()->getHashTable()) { - if (Error E = checkHashTable(Obj, HashTable)) + if (Error E = checkHashTable(this->Obj, HashTable)) this->reportUniqueWarning(std::move(E)); else printHashHistogram(*HashTable); @@ -4808,20 +4764,18 @@ void GNUStyle::printHashHistograms(const ELFFile *Obj) { // Print histogram for the .gnu.hash section. if (const Elf_GnuHash *GnuHashTable = this->dumper()->getGnuHashTable()) { - if (Error E = checkGNUHashTable(Obj, GnuHashTable)) + if (Error E = checkGNUHashTable(this->Obj, GnuHashTable)) this->reportUniqueWarning(std::move(E)); else printGnuHashHistogram(*GnuHashTable); } } -template -void GNUStyle::printCGProfile(const ELFFile *Obj) { +template void GNUStyle::printCGProfile() { OS << "GNUStyle::printCGProfile not implemented\n"; } -template -void GNUStyle::printAddrsig(const ELFFile *Obj) { +template void GNUStyle::printAddrsig() { reportError(createError("--addrsig: not implemented"), this->FileName); } @@ -5352,8 +5306,7 @@ static void printCoreNote(raw_ostream &OS, const CoreNote &Note) { } } -template -void GNUStyle::printNotes(const ELFFile *Obj) { +template void GNUStyle::printNotes() { auto PrintHeader = [&](Optional SecName, const typename ELFT::Off Offset, const typename ELFT::Addr Size) { @@ -5385,7 +5338,7 @@ void GNUStyle::printNotes(const ELFFile *Obj) { } else if (Name == "AMDGPU") { OS << getAMDGPUNoteTypeName(Type) << '\n'; } else { - StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE + StringRef NoteType = this->Obj.getHeader()->e_type == ELF::ET_CORE ? getCoreNoteTypeName(Type) : getGenericNoteTypeName(Type); if (!NoteType.empty()) @@ -5425,21 +5378,21 @@ void GNUStyle::printNotes(const ELFFile *Obj) { } }; - ArrayRef Sections = cantFail(Obj->sections()); - if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { + ArrayRef Sections = cantFail(this->Obj.sections()); + if (this->Obj.getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { for (const auto &S : Sections) { if (S.sh_type != SHT_NOTE) continue; - PrintHeader(expectedToOptional(Obj->getSectionName(&S)), S.sh_offset, + PrintHeader(expectedToOptional(this->Obj.getSectionName(&S)), S.sh_offset, S.sh_size); Error Err = Error::success(); - for (auto Note : Obj->notes(S, Err)) + for (auto Note : this->Obj.notes(S, Err)) ProcessNote(Note); if (Err) reportError(std::move(Err), this->FileName); } } else { - Expected> PhdrsOrErr = Obj->program_headers(); + Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning(createError( "unable to read program headers to locate the PT_NOTE segment: " + @@ -5452,7 +5405,7 @@ void GNUStyle::printNotes(const ELFFile *Obj) { continue; PrintHeader(/*SecName=*/None, P.p_offset, P.p_filesz); Error Err = Error::success(); - for (auto Note : Obj->notes(P, Err)) + for (auto Note : this->Obj.notes(P, Err)) ProcessNote(Note); if (Err) reportError(std::move(Err), this->FileName); @@ -5460,14 +5413,12 @@ void GNUStyle::printNotes(const ELFFile *Obj) { } } -template -void GNUStyle::printELFLinkerOptions(const ELFFile *Obj) { +template void GNUStyle::printELFLinkerOptions() { OS << "printELFLinkerOptions not implemented!\n"; } template void DumpStyle::printDependentLibsHelper( - const ELFFile *Obj, function_ref OnSectionStart, function_ref OnLibEntry) { auto Warn = [this](unsigned SecNdx, StringRef Msg) { @@ -5477,14 +5428,14 @@ void DumpStyle::printDependentLibsHelper( }; unsigned I = -1; - for (const Elf_Shdr &Shdr : cantFail(Obj->sections())) { + for (const Elf_Shdr &Shdr : cantFail(Obj.sections())) { ++I; if (Shdr.sh_type != ELF::SHT_LLVM_DEPENDENT_LIBRARIES) continue; OnSectionStart(Shdr); - Expected> ContentsOrErr = Obj->getSectionContents(&Shdr); + Expected> ContentsOrErr = Obj.getSectionContents(&Shdr); if (!ContentsOrErr) { Warn(I, toString(ContentsOrErr.takeError())); continue; @@ -5505,8 +5456,7 @@ void DumpStyle::printDependentLibsHelper( } template -void DumpStyle::printRelocationsHelper(const ELFFile *Obj, - const Elf_Shdr &Sec) { +void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { auto Warn = [&](Error &&E, const Twine &Prefix = "unable to read relocations from") { this->reportUniqueWarning(createError(Prefix + " " + describe(Obj, Sec) + @@ -5518,7 +5468,7 @@ void DumpStyle::printRelocationsHelper(const ELFFile *Obj, // a symbol table. const Elf_Shdr *SymTab; if (Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_RELR) { - Expected SymTabOrErr = Obj->getSection(Sec.sh_link); + Expected SymTabOrErr = Obj.getSection(Sec.sh_link); if (!SymTabOrErr) { Warn(SymTabOrErr.takeError(), "unable to locate a symbol table for"); return; @@ -5526,28 +5476,28 @@ void DumpStyle::printRelocationsHelper(const ELFFile *Obj, SymTab = *SymTabOrErr; } - unsigned SecNdx = &Sec - &cantFail(Obj->sections()).front(); + unsigned SecNdx = &Sec - &cantFail(Obj.sections()).front(); unsigned RelNdx = 0; switch (Sec.sh_type) { case ELF::SHT_REL: - if (Expected RangeOrErr = Obj->rels(&Sec)) { + if (Expected RangeOrErr = Obj.rels(&Sec)) { for (const Elf_Rel &R : *RangeOrErr) - printRelReloc(Obj, SecNdx, SymTab, R, ++RelNdx); + printRelReloc(SecNdx, SymTab, R, ++RelNdx); } else { Warn(RangeOrErr.takeError()); } break; case ELF::SHT_RELA: - if (Expected RangeOrErr = Obj->relas(&Sec)) { + if (Expected RangeOrErr = Obj.relas(&Sec)) { for (const Elf_Rela &R : *RangeOrErr) - printRelaReloc(Obj, SecNdx, SymTab, R, ++RelNdx); + printRelaReloc(SecNdx, SymTab, R, ++RelNdx); } else { Warn(RangeOrErr.takeError()); } break; case ELF::SHT_RELR: case ELF::SHT_ANDROID_RELR: { - Expected RangeOrErr = Obj->relrs(&Sec); + Expected RangeOrErr = Obj.relrs(&Sec); if (!RangeOrErr) { Warn(RangeOrErr.takeError()); break; @@ -5558,15 +5508,15 @@ void DumpStyle::printRelocationsHelper(const ELFFile *Obj, break; } - for (const Elf_Rel &R : Obj->decode_relrs(*RangeOrErr)) - printRelReloc(Obj, SecNdx, /*SymTab=*/nullptr, R, ++RelNdx); + for (const Elf_Rel &R : Obj.decode_relrs(*RangeOrErr)) + printRelReloc(SecNdx, /*SymTab=*/nullptr, R, ++RelNdx); break; } case ELF::SHT_ANDROID_REL: case ELF::SHT_ANDROID_RELA: - if (Expected> RelasOrErr = Obj->android_relas(&Sec)) { + if (Expected> RelasOrErr = Obj.android_relas(&Sec)) { for (const Elf_Rela &R : *RelasOrErr) - printRelaReloc(Obj, SecNdx, SymTab, R, ++RelNdx); + printRelaReloc(SecNdx, SymTab, R, ++RelNdx); } else { Warn(RelasOrErr.takeError()); } @@ -5575,11 +5525,10 @@ void DumpStyle::printRelocationsHelper(const ELFFile *Obj, } template -StringRef DumpStyle::getPrintableSectionName(const ELFFile *Obj, - const Elf_Shdr &Sec) const { +StringRef DumpStyle::getPrintableSectionName(const Elf_Shdr &Sec) const { StringRef Name = ""; if (Expected SecNameOrErr = - Obj->getSectionName(&Sec, this->dumper()->WarningHandler)) + Obj.getSectionName(&Sec, this->dumper()->WarningHandler)) Name = *SecNameOrErr; else this->reportUniqueWarning(createError("unable to get the name of " + @@ -5588,8 +5537,7 @@ StringRef DumpStyle::getPrintableSectionName(const ELFFile *Obj, return Name; } -template -void GNUStyle::printDependentLibs(const ELFFile *Obj) { +template void GNUStyle::printDependentLibs() { bool SectionStarted = false; struct NameOffset { StringRef Name; @@ -5613,13 +5561,13 @@ void GNUStyle::printDependentLibs(const ELFFile *Obj) { PrintSection(); SectionStarted = true; Current.Offset = Shdr.sh_offset; - Current.Name = this->getPrintableSectionName(Obj, Shdr); + Current.Name = this->getPrintableSectionName(Shdr); }; auto OnLibEntry = [&](StringRef Lib, uint64_t Offset) { SecEntries.push_back(NameOffset{Lib, Offset}); }; - this->printDependentLibsHelper(Obj, OnSectionStart, OnLibEntry); + this->printDependentLibsHelper(OnSectionStart, OnLibEntry); if (SectionStarted) PrintSection(); } @@ -5788,7 +5736,7 @@ void DumpStyle::printNonRelocatableStackSizes( if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) { reportUniqueWarning(createStringError( object_error::parse_failed, - describe(EF, *ElfSec) + + describe(*EF, *ElfSec) + " ended while trying to extract a stack size entry")); break; } @@ -5835,7 +5783,7 @@ void DumpStyle::printRelocatableStackSizes( if (!RelSecOrErr) { reportUniqueWarning( createStringError(object_error::parse_failed, - describe(Obj->getELFFile(), *ElfSec) + + describe(*Obj->getELFFile(), *ElfSec) + ": failed to get a relocated section: " + toString(RelSecOrErr.takeError()))); continue; @@ -5891,7 +5839,7 @@ void DumpStyle::printRelocatableStackSizes( Obj->getSection(RelocSec.getRawDataRefImpl()); reportUniqueWarning(createStringError( object_error::parse_failed, - describe(EF, *RelocSecShdr) + + describe(*EF, *RelocSecShdr) + " contains an unsupported relocation with index " + Twine(I) + ": " + EF->getRelocationTypeName(Reloc.getType()))); continue; @@ -5989,8 +5937,7 @@ void GNUStyle::printMipsGOT(const MipsGOTParser &Parser) { OS.PadToColumn(40 + 3 * Bias); OS << printEnum(Sym->getType(), makeArrayRef(ElfSymbolTypes)); OS.PadToColumn(48 + 3 * Bias); - OS << getSymbolSectionNdx(Parser.Obj, Sym, - this->dumper()->dynamic_symbols().begin()); + OS << getSymbolSectionNdx(Sym, this->dumper()->dynamic_symbols().begin()); OS.PadToColumn(52 + 3 * Bias); OS << SymName << "\n"; } @@ -6039,8 +5986,7 @@ void GNUStyle::printMipsPLT(const MipsGOTParser &Parser) { OS.PadToColumn(29 + 3 * Bias); OS << printEnum(Sym->getType(), makeArrayRef(ElfSymbolTypes)); OS.PadToColumn(37 + 3 * Bias); - OS << getSymbolSectionNdx(Parser.Obj, Sym, - this->dumper()->dynamic_symbols().begin()); + OS << getSymbolSectionNdx(Sym, this->dumper()->dynamic_symbols().begin()); OS.PadToColumn(41 + 3 * Bias); OS << SymName << "\n"; } @@ -6101,8 +6047,8 @@ void GNUStyle::printMipsABIFlags(const ELFObjectFile *ObjF) { OS << "\n"; } -template void LLVMStyle::printFileHeaders(const ELFO *Obj) { - const Elf_Ehdr *E = Obj->getHeader(); +template void LLVMStyle::printFileHeaders() { + const Elf_Ehdr *E = this->Obj.getHeader(); { DictScope D(W, "ElfHeader"); { @@ -6155,16 +6101,15 @@ template void LLVMStyle::printFileHeaders(const ELFO *Obj) { W.printNumber("ProgramHeaderCount", E->e_phnum); W.printNumber("SectionHeaderEntrySize", E->e_shentsize); W.printString("SectionHeaderCount", - getSectionHeadersNumString(Obj, this->FileName)); + getSectionHeadersNumString(this->Obj, this->FileName)); W.printString("StringTableSectionIndex", - getSectionHeaderTableIndexString(Obj, this->FileName)); + getSectionHeaderTableIndexString(this->Obj, this->FileName)); } } -template -void LLVMStyle::printGroupSections(const ELFO *Obj) { +template void LLVMStyle::printGroupSections() { DictScope Lists(W, "Groups"); - std::vector V = getGroups(Obj, this->FileName); + std::vector V = getGroups(this->Obj, this->FileName); DenseMap Map = mapSectionsToGroups(V); for (const GroupSection &G : V) { DictScope D(W, "Group"); @@ -6193,35 +6138,33 @@ void LLVMStyle::printGroupSections(const ELFO *Obj) { W.startLine() << "There are no group sections in the file.\n"; } -template void LLVMStyle::printRelocations(const ELFO *Obj) { +template void LLVMStyle::printRelocations() { ListScope D(W, "Relocations"); - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { if (!isRelocationSec(Sec)) continue; - StringRef Name = this->getPrintableSectionName(Obj, Sec); - unsigned SecNdx = &Sec - &cantFail(Obj->sections()).front(); + StringRef Name = this->getPrintableSectionName(Sec); + unsigned SecNdx = &Sec - &cantFail(this->Obj.sections()).front(); W.startLine() << "Section (" << SecNdx << ") " << Name << " {\n"; W.indent(); - this->printRelocationsHelper(Obj, Sec); + this->printRelocationsHelper(Sec); W.unindent(); W.startLine() << "}\n"; } } template -void LLVMStyle::printRelReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rel &R, - unsigned RelIndex) { - printRelRelaReloc(Obj, SecIndex, R, RelIndex, SymTab); +void LLVMStyle::printRelReloc(unsigned SecIndex, const Elf_Shdr *SymTab, + const Elf_Rel &R, unsigned RelIndex) { + printRelRelaReloc(SecIndex, R, RelIndex, SymTab); } template -void LLVMStyle::printRelaReloc(const ELFO *Obj, unsigned SecIndex, - const Elf_Shdr *SymTab, const Elf_Rela &R, - unsigned RelIndex) { - printRelRelaReloc(Obj, SecIndex, R, RelIndex, SymTab); +void LLVMStyle::printRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, + const Elf_Rela &R, unsigned RelIndex) { + printRelRelaReloc(SecIndex, R, RelIndex, SymTab); } template void LLVMStyle::printRelrReloc(const Elf_Relr &R) { @@ -6230,8 +6173,8 @@ template void LLVMStyle::printRelrReloc(const Elf_Relr &R) { template template -void LLVMStyle::printRelRelaReloc(const ELFO *Obj, unsigned SecIndex, - const RelTy &Rel, unsigned RelIndex, +void LLVMStyle::printRelRelaReloc(unsigned SecIndex, const RelTy &Rel, + unsigned RelIndex, const Elf_Shdr *SymTab) { Expected> Target = this->dumper()->getRelocationTarget(SymTab, Rel); @@ -6244,15 +6187,16 @@ void LLVMStyle::printRelRelaReloc(const ELFO *Obj, unsigned SecIndex, std::string TargetName = Target->second; SmallString<32> RelocName; - Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName); + this->Obj.getRelocationTypeName(Rel.getType(this->Obj.isMips64EL()), + RelocName); uintX_t Addend = getAddend(Rel).getValueOr(0); if (opts::ExpandRelocs) { DictScope Group(W, "Relocation"); W.printHex("Offset", Rel.r_offset); - W.printNumber("Type", RelocName, (int)Rel.getType(Obj->isMips64EL())); + W.printNumber("Type", RelocName, (int)Rel.getType(this->Obj.isMips64EL())); W.printNumber("Symbol", !TargetName.empty() ? TargetName : "-", - Rel.getSymbol(Obj->isMips64EL())); + Rel.getSymbol(this->Obj.isMips64EL())); W.printHex("Addend", Addend); } else { raw_ostream &OS = W.startLine(); @@ -6262,21 +6206,20 @@ void LLVMStyle::printRelRelaReloc(const ELFO *Obj, unsigned SecIndex, } } -template -void LLVMStyle::printSectionHeaders(const ELFO *Obj) { +template void LLVMStyle::printSectionHeaders() { ListScope SectionsD(W, "Sections"); int SectionIndex = -1; std::vector> FlagsList = - getSectionFlagsForTarget(Obj->getHeader()->e_machine); - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { + getSectionFlagsForTarget(this->Obj.getHeader()->e_machine); + for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) { DictScope SectionD(W, "Section"); W.printNumber("Index", ++SectionIndex); - W.printNumber("Name", this->getPrintableSectionName(Obj, Sec), Sec.sh_name); - W.printHex( - "Type", - object::getELFSectionTypeName(Obj->getHeader()->e_machine, Sec.sh_type), - Sec.sh_type); + W.printNumber("Name", this->getPrintableSectionName(Sec), Sec.sh_name); + W.printHex("Type", + object::getELFSectionTypeName(this->Obj.getHeader()->e_machine, + Sec.sh_type), + Sec.sh_type); W.printFlags("Flags", Sec.sh_flags, makeArrayRef(FlagsList)); W.printHex("Address", Sec.sh_addr); W.printHex("Offset", Sec.sh_offset); @@ -6288,32 +6231,33 @@ void LLVMStyle::printSectionHeaders(const ELFO *Obj) { if (opts::SectionRelocations) { ListScope D(W, "Relocations"); - this->printRelocationsHelper(Obj, Sec); + this->printRelocationsHelper(Sec); } if (opts::SectionSymbols) { ListScope D(W, "Symbols"); if (const Elf_Shdr *Symtab = this->dumper()->getDotSymtabSec()) { StringRef StrTable = unwrapOrError( - this->FileName, Obj->getStringTableForSymtab(*Symtab)); + this->FileName, this->Obj.getStringTableForSymtab(*Symtab)); for (const Elf_Sym &Sym : - unwrapOrError(this->FileName, Obj->symbols(Symtab))) { - const Elf_Shdr *SymSec = unwrapOrError( - this->FileName, - Obj->getSection(&Sym, Symtab, this->dumper()->getShndxTable())); + unwrapOrError(this->FileName, this->Obj.symbols(Symtab))) { + const Elf_Shdr *SymSec = + unwrapOrError(this->FileName, + this->Obj.getSection( + &Sym, Symtab, this->dumper()->getShndxTable())); if (SymSec == &Sec) - printSymbol( - Obj, &Sym, - unwrapOrError(this->FileName, Obj->symbols(Symtab)).begin(), - StrTable, false, false); + printSymbol(&Sym, + unwrapOrError(this->FileName, this->Obj.symbols(Symtab)) + .begin(), + StrTable, false, false); } } } if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) { ArrayRef Data = - unwrapOrError(this->FileName, Obj->getSectionContents(&Sec)); + unwrapOrError(this->FileName, this->Obj.getSectionContents(&Sec)); W.printBinaryBlock( "SectionData", StringRef(reinterpret_cast(Data.data()), Data.size())); @@ -6351,8 +6295,7 @@ void LLVMStyle::printSymbolSection(const Elf_Sym *Symbol, } template -void LLVMStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, - const Elf_Sym *First, +void LLVMStyle::printSymbol(const Elf_Sym *Symbol, const Elf_Sym *First, Optional StrTable, bool IsDynamic, bool /*NonVisibilityBitsUsed*/) { std::string FullSymbolName = @@ -6364,7 +6307,7 @@ void LLVMStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, W.printHex("Value", Symbol->st_value); W.printNumber("Size", Symbol->st_size); W.printEnum("Binding", Symbol->getBinding(), makeArrayRef(ElfSymbolBindings)); - if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU && + if (this->Obj.getHeader()->e_machine == ELF::EM_AMDGPU && SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) W.printEnum("Type", SymbolType, makeArrayRef(AMDGPUSymbolTypes)); else @@ -6376,7 +6319,7 @@ void LLVMStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, else { std::vector> SymOtherFlags(std::begin(ElfSymOtherFlags), std::end(ElfSymOtherFlags)); - if (Obj->getHeader()->e_machine == EM_MIPS) { + if (this->Obj.getHeader()->e_machine == EM_MIPS) { // Someones in their infinite wisdom decided to make STO_MIPS_MIPS16 // flag overlapped with other ST_MIPS_xxx flags. So consider both // cases separately. @@ -6395,33 +6338,32 @@ void LLVMStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, } template -void LLVMStyle::printSymbols(const ELFO *Obj, bool PrintSymbols, +void LLVMStyle::printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) { if (PrintSymbols) - printSymbols(Obj); + printSymbols(); if (PrintDynamicSymbols) - printDynamicSymbols(Obj); + printDynamicSymbols(); } -template void LLVMStyle::printSymbols(const ELFO *Obj) { +template void LLVMStyle::printSymbols() { ListScope Group(W, "Symbols"); this->dumper()->printSymbolsHelper(false); } -template -void LLVMStyle::printDynamicSymbols(const ELFO *Obj) { +template void LLVMStyle::printDynamicSymbols() { ListScope Group(W, "DynamicSymbols"); this->dumper()->printSymbolsHelper(true); } -template void LLVMStyle::printDynamic(const ELFFile *Obj) { +template void LLVMStyle::printDynamic() { Elf_Dyn_Range Table = this->dumper()->dynamic_table(); if (Table.empty()) return; W.startLine() << "DynamicSection [ (" << Table.size() << " entries)\n"; - size_t MaxTagSize = getMaxDynamicTagSize(Obj, Table); + size_t MaxTagSize = getMaxDynamicTagSize(this->Obj, Table); // The "Name/Value" column should be indented from the "Type" column by N // spaces, where N = MaxTagSize - length of "Type" (4) + trailing // space (1) = -3. @@ -6435,14 +6377,13 @@ template void LLVMStyle::printDynamic(const ELFFile *Ob W.startLine() << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10, true) << " " << format(ValueFmt.c_str(), - Obj->getDynamicTagAsString(Tag).c_str()) + this->Obj.getDynamicTagAsString(Tag).c_str()) << Value << "\n"; } W.startLine() << "]\n"; } -template -void LLVMStyle::printDynamicRelocations(const ELFO *Obj) { +template void LLVMStyle::printDynamicRelocations() { const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion(); const DynRegionInfo &DynRelaRegion = this->dumper()->getDynRelaRegion(); const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion(); @@ -6452,42 +6393,43 @@ void LLVMStyle::printDynamicRelocations(const ELFO *Obj) { W.indent(); if (DynRelaRegion.Size > 0) { for (const Elf_Rela &Rela : this->dumper()->dyn_relas()) - printDynamicRelocation(Obj, Rela); + printDynamicRelocation(Rela); } if (DynRelRegion.Size > 0) { for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) - printDynamicRelocation(Obj, Rel); + printDynamicRelocation(Rel); } if (DynRelrRegion.Size > 0) { Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); - for (const Elf_Rel &R : Obj->decode_relrs(Relrs)) - printDynamicRelocation(Obj, R); + for (const Elf_Rel &R : this->Obj.decode_relrs(Relrs)) + printDynamicRelocation(R); } if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Obj, Rela); + printDynamicRelocation(Rela); else for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Obj, Rel); + printDynamicRelocation(Rel); W.unindent(); W.startLine() << "}\n"; } template - template -void LLVMStyle::printDynamicRelocation(const ELFO *Obj, const RelTy& Rel) { +template +void LLVMStyle::printDynamicRelocation(const RelTy &Rel) { SmallString<32> RelocName; - Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName); + this->Obj.getRelocationTypeName(Rel.getType(this->Obj.isMips64EL()), + RelocName); std::string SymbolName = - getSymbolForReloc(Obj, this->FileName, this->dumper(), Rel).Name; + getSymbolForReloc(this->Obj, this->FileName, this->dumper(), Rel).Name; uintX_t Addend = getAddend(Rel).getValueOr(0); if (opts::ExpandRelocs) { DictScope Group(W, "Relocation"); W.printHex("Offset", Rel.r_offset); - W.printNumber("Type", RelocName, (int)Rel.getType(Obj->isMips64EL())); + W.printNumber("Type", RelocName, (int)Rel.getType(this->Obj.isMips64EL())); W.printString("Symbol", !SymbolName.empty() ? SymbolName : "-"); W.printHex("Addend", Addend); } else { @@ -6500,19 +6442,17 @@ void LLVMStyle::printDynamicRelocation(const ELFO *Obj, const RelTy& Rel) template void LLVMStyle::printProgramHeaders( - const ELFO *Obj, bool PrintProgramHeaders, - cl::boolOrDefault PrintSectionMapping) { + bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) { if (PrintProgramHeaders) - printProgramHeaders(Obj); + printProgramHeaders(); if (PrintSectionMapping == cl::BOU_TRUE) - printSectionMapping(Obj); + printSectionMapping(); } -template -void LLVMStyle::printProgramHeaders(const ELFO *Obj) { +template void LLVMStyle::printProgramHeaders() { ListScope L(W, "ProgramHeaders"); - Expected> PhdrsOrErr = Obj->program_headers(); + Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning(createError("unable to dump program headers: " + toString(PhdrsOrErr.takeError()))); @@ -6522,7 +6462,7 @@ void LLVMStyle::printProgramHeaders(const ELFO *Obj) { for (const Elf_Phdr &Phdr : *PhdrsOrErr) { DictScope P(W, "ProgramHeader"); StringRef Type = - segmentTypeToString(Obj->getHeader()->e_machine, Phdr.p_type); + segmentTypeToString(this->Obj.getHeader()->e_machine, Phdr.p_type); W.printHex("Type", Type.empty() ? "Unknown" : Type, Phdr.p_type); W.printHex("Offset", Phdr.p_offset); @@ -6536,8 +6476,7 @@ void LLVMStyle::printProgramHeaders(const ELFO *Obj) { } template -void LLVMStyle::printVersionSymbolSection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void LLVMStyle::printVersionSymbolSection(const Elf_Shdr *Sec) { ListScope SS(W, "VersionSymbols"); if (!Sec) return; @@ -6557,8 +6496,8 @@ void LLVMStyle::printVersionSymbolSection(const ELFFile *Obj, for (size_t I = 0, E = Syms.size(); I < E; ++I) { DictScope S(W, "Symbol"); W.printNumber("Version", (*VerTableOrErr)[I].vs_index & VERSYM_VERSION); - W.printString("Name", this->dumper()->getFullSymbolName( - &Syms[I], StrTable, /*IsDynamic=*/true)); + W.printString("Name", this->dumper()->getFullSymbolName(&Syms[I], StrTable, + /*IsDynamic=*/true)); } } @@ -6568,8 +6507,7 @@ static const EnumEntry SymVersionFlags[] = { {"Info", "INFO", VER_FLG_INFO}}; template -void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void LLVMStyle::printVersionDefinitionSection(const Elf_Shdr *Sec) { ListScope SD(W, "VersionDefinitions"); if (!Sec) return; @@ -6594,8 +6532,7 @@ void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, } template -void LLVMStyle::printVersionDependencySection(const ELFFile *Obj, - const Elf_Shdr *Sec) { +void LLVMStyle::printVersionDependencySection(const Elf_Shdr *Sec) { ListScope SD(W, "VersionRequirements"); if (!Sec) return; @@ -6624,19 +6561,17 @@ void LLVMStyle::printVersionDependencySection(const ELFFile *Obj, } } -template -void LLVMStyle::printHashHistograms(const ELFFile *Obj) { +template void LLVMStyle::printHashHistograms() { W.startLine() << "Hash Histogram not implemented!\n"; } -template -void LLVMStyle::printCGProfile(const ELFFile *Obj) { +template void LLVMStyle::printCGProfile() { ListScope L(W, "CGProfile"); if (!this->dumper()->getDotCGProfileSec()) return; Expected> CGProfileOrErr = - Obj->template getSectionContentsAsArray( + this->Obj.template getSectionContentsAsArray( this->dumper()->getDotCGProfileSec()); if (!CGProfileOrErr) { this->reportUniqueWarning( @@ -6670,14 +6605,13 @@ static Expected> toULEB128Array(ArrayRef Data) { return Ret; } -template -void LLVMStyle::printAddrsig(const ELFFile *Obj) { +template void LLVMStyle::printAddrsig() { ListScope L(W, "Addrsig"); if (!this->dumper()->getDotAddrsigSec()) return; ArrayRef Contents = unwrapOrError( this->FileName, - Obj->getSectionContents(this->dumper()->getDotAddrsigSec())); + this->Obj.getSectionContents(this->dumper()->getDotAddrsigSec())); Expected> V = toULEB128Array(Contents); if (!V) { reportWarning(V.takeError(), this->FileName); @@ -6737,8 +6671,7 @@ static void printCoreNoteLLVMStyle(const CoreNote &Note, ScopedPrinter &W) { } } -template -void LLVMStyle::printNotes(const ELFFile *Obj) { +template void LLVMStyle::printNotes() { ListScope L(W, "Notes"); auto PrintHeader = [&](Optional SecName, @@ -6767,7 +6700,7 @@ void LLVMStyle::printNotes(const ELFFile *Obj) { } else if (Name == "AMDGPU") { W.printString("Type", getAMDGPUNoteTypeName(Type)); } else { - StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE + StringRef NoteType = this->Obj.getHeader()->e_type == ELF::ET_CORE ? getCoreNoteTypeName(Type) : getGenericNoteTypeName(Type); if (!NoteType.empty()) @@ -6805,22 +6738,22 @@ void LLVMStyle::printNotes(const ELFFile *Obj) { } }; - ArrayRef Sections = cantFail(Obj->sections()); - if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { + ArrayRef Sections = cantFail(this->Obj.sections()); + if (this->Obj.getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { for (const auto &S : Sections) { if (S.sh_type != SHT_NOTE) continue; DictScope D(W, "NoteSection"); - PrintHeader(expectedToOptional(Obj->getSectionName(&S)), S.sh_offset, + PrintHeader(expectedToOptional(this->Obj.getSectionName(&S)), S.sh_offset, S.sh_size); Error Err = Error::success(); - for (auto Note : Obj->notes(S, Err)) + for (auto Note : this->Obj.notes(S, Err)) ProcessNote(Note); if (Err) reportError(std::move(Err), this->FileName); } } else { - Expected> PhdrsOrErr = Obj->program_headers(); + Expected> PhdrsOrErr = this->Obj.program_headers(); if (!PhdrsOrErr) { this->reportUniqueWarning(createError( "unable to read program headers to locate the PT_NOTE segment: " + @@ -6834,7 +6767,7 @@ void LLVMStyle::printNotes(const ELFFile *Obj) { DictScope D(W, "NoteSection"); PrintHeader(/*SecName=*/None, P.p_offset, P.p_filesz); Error Err = Error::success(); - for (auto Note : Obj->notes(P, Err)) + for (auto Note : this->Obj.notes(P, Err)) ProcessNote(Note); if (Err) reportError(std::move(Err), this->FileName); @@ -6842,17 +6775,17 @@ void LLVMStyle::printNotes(const ELFFile *Obj) { } } -template -void LLVMStyle::printELFLinkerOptions(const ELFFile *Obj) { +template void LLVMStyle::printELFLinkerOptions() { ListScope L(W, "LinkerOptions"); unsigned I = -1; - for (const Elf_Shdr &Shdr : cantFail(Obj->sections())) { + for (const Elf_Shdr &Shdr : cantFail(this->Obj.sections())) { ++I; if (Shdr.sh_type != ELF::SHT_LLVM_LINKER_OPTIONS) continue; - Expected> ContentsOrErr = Obj->getSectionContents(&Shdr); + Expected> ContentsOrErr = + this->Obj.getSectionContents(&Shdr); if (!ContentsOrErr) { this->reportUniqueWarning( createError("unable to read the content of the " @@ -6887,11 +6820,10 @@ void LLVMStyle::printELFLinkerOptions(const ELFFile *Obj) { } } -template -void LLVMStyle::printDependentLibs(const ELFFile *Obj) { +template void LLVMStyle::printDependentLibs() { ListScope L(W, "DependentLibs"); this->printDependentLibsHelper( - Obj, [](const Elf_Shdr &) {}, + [](const Elf_Shdr &) {}, [this](StringRef Lib, uint64_t) { W.printString(Lib); }); } From 3542feeb2077f267bff1ab98fb4bf20099f44bb8 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 3 Sep 2020 10:17:09 +0100 Subject: [PATCH 089/465] [SCCP] Do not replace deref'able ptr with un-deref'able one. Currently IPSCCP (and others like CVP/GVN) blindly propagate pointer equalities. In certain cases, that leads to dereferenceable pointers being replaced, as in the example test case. I think this is not allowed, as it introduces an access of an un-dereferenceable pointer. Note that the pointer is inbounds, but one past the last element, so it is valid, but not dereferenceable. This patch is mostly to highlight the issue and start a discussion. Currently it only checks for specifically looking one-past-the-last-element pointers with array typed bases. This causes the mis-compile outlined in https://stackoverflow.com/questions/55754313/is-this-gcc-clang-past-one-pointer-comparison-behavior-conforming-or-non-standar In the test case, if we replace %p with the GEP for the store, we subsequently determine that the store and the load cannot alias, because they are to different underlying objects. Note that Alive2 seems to think that the replacement is valid: https://alive2.llvm.org/ce/z/2rorhk Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D85332 --- llvm/lib/Transforms/Scalar/SCCP.cpp | 11 +++++++++++ llvm/test/Transforms/SCCP/apint-bigint2.ll | 6 ++++-- llvm/test/Transforms/SCCP/indirectbr.ll | 4 +++- ...lace-dereferenceable-ptr-with-undereferenceable.ll | 4 ++-- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 2afc778ed8214..0035ae288ebbe 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueLattice.h" #include "llvm/Analysis/ValueLatticeUtils.h" @@ -177,6 +178,8 @@ class SCCPSolver : public InstVisitor { LLVMContext &Ctx; public: + const DataLayout &getDataLayout() const { return DL; } + void addAnalysis(Function &F, AnalysisResultsForFn A) { AnalysisResults.insert({&F, std::move(A)}); } @@ -1630,6 +1633,14 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { return false; } + // Do not propagate equality of a un-dereferenceable pointer. + // FIXME: Currently this only treats pointers one past the last element + // for array types. Should probably be much stricter. + if (Const->getType()->isPointerTy() && + !canReplacePointersIfEqual(V, Const, Solver.getDataLayout(), + dyn_cast(V))) + return false; + LLVM_DEBUG(dbgs() << " Constant: " << *Const << " = " << *V << '\n'); // Replaces all of the uses of a variable with uses of the constant. diff --git a/llvm/test/Transforms/SCCP/apint-bigint2.ll b/llvm/test/Transforms/SCCP/apint-bigint2.ll index 8effa2181a4c2..7d1a9a68372c7 100644 --- a/llvm/test/Transforms/SCCP/apint-bigint2.ll +++ b/llvm/test/Transforms/SCCP/apint-bigint2.ll @@ -51,8 +51,10 @@ define i101 @large_aggregate_2() { } ; CHECK-LABEL: @index_too_large -; CHECK-NEXT: store i101* getelementptr (i101, i101* getelementptr ([6 x i101], [6 x i101]* @Y, i32 0, i32 -1), i101 9224497936761618431), i101** undef -; CHECK-NEXT: ret void +; CHECK-NEXT: %ptr1 = getelementptr [6 x i101], [6 x i101]* @Y, i32 0, i32 -1 +; CHECK-NEXT: %ptr2 = getelementptr i101, i101* %ptr1, i101 9224497936761618431 +; CHECK-NEXT: store i101* %ptr2, i101** undef +; CHECK-NEXT: ret void define void @index_too_large() { %ptr1 = getelementptr [6 x i101], [6 x i101]* @Y, i32 0, i32 -1 %ptr2 = getelementptr i101, i101* %ptr1, i101 9224497936761618431 diff --git a/llvm/test/Transforms/SCCP/indirectbr.ll b/llvm/test/Transforms/SCCP/indirectbr.ll index 6889282e3874f..1a9ae8a128f18 100644 --- a/llvm/test/Transforms/SCCP/indirectbr.ll +++ b/llvm/test/Transforms/SCCP/indirectbr.ll @@ -31,7 +31,9 @@ BB1: define void @indbrtest2() { ; CHECK-LABEL: @indbrtest2( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK-NEXT: [[B:%.*]] = inttoptr i64 ptrtoint (i8* blockaddress(@indbrtest2, [[BB1:%.*]]) to i64) to i8* +; CHECK-NEXT: [[C:%.*]] = bitcast i8* [[B]] to i8* +; CHECK-NEXT: br label [[BB1]] ; CHECK: BB1: ; CHECK-NEXT: call void @BB1_f() ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll b/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll index 5857ce2d30b72..639e9ee760427 100644 --- a/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll +++ b/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll @@ -11,7 +11,7 @@ define i32 @eq_undereferenceable(i32* %p) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store i32 2, i32* getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1), align 4 +; CHECK-NEXT: store i32 2, i32* [[P]], align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 @@ -65,7 +65,7 @@ define i1 @eq_undereferenceable_cmp_simp(i32* %p) { ; CHECK-NEXT: [[CMP_0:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) ; CHECK-NEXT: br i1 [[CMP_0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store i32 2, i32* getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1), align 4 +; CHECK-NEXT: store i32 2, i32* [[P]], align 4 ; CHECK-NEXT: ret i1 true ; CHECK: if.end: ; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i32* [[P]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) From 4c5e4aa89b11ec3253258b8df5125833773d1b1e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 3 Sep 2020 10:28:42 +0100 Subject: [PATCH 090/465] Revert "[SCCP] Do not replace deref'able ptr with un-deref'able one." This reverts commit 3542feeb2077f267bff1ab98fb4bf20099f44bb8. This seems to be causing issues with a sanitizer build http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap-msan/builds/21677 --- llvm/lib/Transforms/Scalar/SCCP.cpp | 11 ----------- llvm/test/Transforms/SCCP/apint-bigint2.ll | 6 ++---- llvm/test/Transforms/SCCP/indirectbr.ll | 4 +--- ...lace-dereferenceable-ptr-with-undereferenceable.ll | 4 ++-- 4 files changed, 5 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 0035ae288ebbe..2afc778ed8214 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -31,7 +31,6 @@ #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueLattice.h" #include "llvm/Analysis/ValueLatticeUtils.h" @@ -178,8 +177,6 @@ class SCCPSolver : public InstVisitor { LLVMContext &Ctx; public: - const DataLayout &getDataLayout() const { return DL; } - void addAnalysis(Function &F, AnalysisResultsForFn A) { AnalysisResults.insert({&F, std::move(A)}); } @@ -1633,14 +1630,6 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { return false; } - // Do not propagate equality of a un-dereferenceable pointer. - // FIXME: Currently this only treats pointers one past the last element - // for array types. Should probably be much stricter. - if (Const->getType()->isPointerTy() && - !canReplacePointersIfEqual(V, Const, Solver.getDataLayout(), - dyn_cast(V))) - return false; - LLVM_DEBUG(dbgs() << " Constant: " << *Const << " = " << *V << '\n'); // Replaces all of the uses of a variable with uses of the constant. diff --git a/llvm/test/Transforms/SCCP/apint-bigint2.ll b/llvm/test/Transforms/SCCP/apint-bigint2.ll index 7d1a9a68372c7..8effa2181a4c2 100644 --- a/llvm/test/Transforms/SCCP/apint-bigint2.ll +++ b/llvm/test/Transforms/SCCP/apint-bigint2.ll @@ -51,10 +51,8 @@ define i101 @large_aggregate_2() { } ; CHECK-LABEL: @index_too_large -; CHECK-NEXT: %ptr1 = getelementptr [6 x i101], [6 x i101]* @Y, i32 0, i32 -1 -; CHECK-NEXT: %ptr2 = getelementptr i101, i101* %ptr1, i101 9224497936761618431 -; CHECK-NEXT: store i101* %ptr2, i101** undef -; CHECK-NEXT: ret void +; CHECK-NEXT: store i101* getelementptr (i101, i101* getelementptr ([6 x i101], [6 x i101]* @Y, i32 0, i32 -1), i101 9224497936761618431), i101** undef +; CHECK-NEXT: ret void define void @index_too_large() { %ptr1 = getelementptr [6 x i101], [6 x i101]* @Y, i32 0, i32 -1 %ptr2 = getelementptr i101, i101* %ptr1, i101 9224497936761618431 diff --git a/llvm/test/Transforms/SCCP/indirectbr.ll b/llvm/test/Transforms/SCCP/indirectbr.ll index 1a9ae8a128f18..6889282e3874f 100644 --- a/llvm/test/Transforms/SCCP/indirectbr.ll +++ b/llvm/test/Transforms/SCCP/indirectbr.ll @@ -31,9 +31,7 @@ BB1: define void @indbrtest2() { ; CHECK-LABEL: @indbrtest2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B:%.*]] = inttoptr i64 ptrtoint (i8* blockaddress(@indbrtest2, [[BB1:%.*]]) to i64) to i8* -; CHECK-NEXT: [[C:%.*]] = bitcast i8* [[B]] to i8* -; CHECK-NEXT: br label [[BB1]] +; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: BB1: ; CHECK-NEXT: call void @BB1_f() ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll b/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll index 639e9ee760427..5857ce2d30b72 100644 --- a/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll +++ b/llvm/test/Transforms/SCCP/replace-dereferenceable-ptr-with-undereferenceable.ll @@ -11,7 +11,7 @@ define i32 @eq_undereferenceable(i32* %p) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store i32 2, i32* [[P]], align 4 +; CHECK-NEXT: store i32 2, i32* getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1), align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @y, i64 0, i64 0), align 4 @@ -65,7 +65,7 @@ define i1 @eq_undereferenceable_cmp_simp(i32* %p) { ; CHECK-NEXT: [[CMP_0:%.*]] = icmp eq i32* [[P:%.*]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) ; CHECK-NEXT: br i1 [[CMP_0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store i32 2, i32* [[P]], align 4 +; CHECK-NEXT: store i32 2, i32* getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1), align 4 ; CHECK-NEXT: ret i1 true ; CHECK: if.end: ; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i32* [[P]], getelementptr inbounds (i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @x, i64 0, i64 0), i64 1) From f9091e56d34fc1a14fe4640b95a691d9ac7afcc4 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Thu, 3 Sep 2020 09:19:41 +0000 Subject: [PATCH 091/465] [clang][aarch64] Drop experimental from __ARM_FEATURE_SVE_BITS macro The __ARM_FEATURE_SVE_BITS feature macro is specified in the Arm C Language Extensions (ACLE) for SVE [1] (version 00bet5). From the spec, where __ARM_FEATURE_SVE_BITS==N: When N is nonzero, indicates that the implementation is generating code for an N-bit SVE target and that the arm_sve_vector_bits(N) attribute is available. This was defined in D83550 as __ARM_FEATURE_SVE_BITS_EXPERIMENTAL and enabled under the -msve-vector-bits flag to simplify initial tests. This patch drops _EXPERIMENTAL now there is support for the feature. [1] https://developer.arm.com/documentation/100987/latest Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D86720 --- clang/include/clang/Basic/AttrDocs.td | 4 ---- clang/lib/Basic/Targets/AArch64.cpp | 3 +-- .../attr-arm-sve-vector-bits-bitcast.c | 2 +- .../CodeGen/attr-arm-sve-vector-bits-call.c | 2 +- .../CodeGen/attr-arm-sve-vector-bits-cast.c | 2 +- .../attr-arm-sve-vector-bits-codegen.c | 2 +- .../attr-arm-sve-vector-bits-globals.c | 2 +- .../CodeGen/attr-arm-sve-vector-bits-types.c | 2 +- .../aarch64-mangle-sve-fixed-vectors.cpp | 2 +- .../CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp | 2 +- .../Preprocessor/aarch64-target-features.c | 24 +++++++++---------- clang/test/Sema/attr-arm-sve-vector-bits.c | 2 +- .../test/SemaCXX/attr-arm-sve-vector-bits.cpp | 2 +- 13 files changed, 22 insertions(+), 29 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 3a28cf2456566..d6d5567c7924e 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4944,10 +4944,6 @@ to the SVE predicate type ``svbool_t``, this excludes tuple types such as ``N==__ARM_FEATURE_SVE_BITS``, the implementation defined feature macro that is enabled under the ``-msve-vector-bits`` flag. -NOTE: This feature is currently WIP, the ``-msve-vector-bits=`` flag defines -the ``__ARM_FEATURE_SVE_BITS_EXPERIMENTAL`` macro. This feature is complete -when experimental is dropped. - For more information See `Arm C Language Extensions for SVE `_ for more information. }]; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 6fd97d4e57869..7f0a0f0d86dc1 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -378,8 +378,7 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); if (Opts.ArmSveVectorBits) - Builder.defineMacro("__ARM_FEATURE_SVE_BITS_EXPERIMENTAL", - Twine(Opts.ArmSveVectorBits)); + Builder.defineMacro("__ARM_FEATURE_SVE_BITS", Twine(Opts.ArmSveVectorBits)); } ArrayRef AArch64TargetInfo::getTargetBuiltins() const { diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c index f6b8b1be1e76b..cab424c3dbe17 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c @@ -6,7 +6,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N))); typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c index 412923f1e898e..490ec92dfdeb5 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c @@ -4,7 +4,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N))); typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c index 6c7edf9033f76..13d8f14f991a8 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c @@ -4,7 +4,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N))); typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c index d93be54a499cb..1a6a68a2e1f4f 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c @@ -3,7 +3,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N))); typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c index be0b314334b9d..d567c718000c8 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c @@ -5,7 +5,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N))); typedef svbfloat16_t fixed_bfloat16_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c index 625e096bf3d6f..a1cfc514081ea 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c @@ -7,7 +7,7 @@ #include -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N))); typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp index cb001cd06e02e..12550396d0fe7 100644 --- a/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp +++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp @@ -14,7 +14,7 @@ // RUN: -target-feature +sve -target-feature +bf16 -msve-vector-bits=2048 \ // RUN: | FileCheck %s --check-prefix=CHECK-2048 -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef __SVInt8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N))); typedef __SVInt16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N))); diff --git a/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp b/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp index 7308aa6ae7a45..e9e15d6e0c4e5 100644 --- a/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp +++ b/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp @@ -14,7 +14,7 @@ // RUN: -target-feature +sve -target-feature +bf16 -msve-vector-bits=2048 \ // RUN: | FileCheck %s --check-prefix=CHECK-2048 -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS namespace std { class type_info; diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index 905a77785a9d8..cb137eea072e6 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -44,12 +44,12 @@ // CHECK-NOT: __ARM_BF16_FORMAT_ALTERNATIVE 1 // CHECK-NOT: __ARM_FEATURE_BF16 1 // CHECK-NOT: __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 0 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 128 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 256 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 512 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 1024 -// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 2048 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 0 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 128 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 256 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 512 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 1024 +// CHECK-NOT: __ARM_FEATURE_SVE_BITS 2048 // RUN: %clang -target aarch64_be-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-BIGENDIAN // CHECK-BIGENDIAN: __ARM_BIG_ENDIAN 1 @@ -444,10 +444,8 @@ // RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=1024 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-1024 %s // RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-2048 %s // RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-2048 %s -// NOTE: The __ARM_FEATURE_SVE_BITS feature macro is experimental until the -// feature is complete. -// CHECK-SVE-VECTOR-BITS-128: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 128 -// CHECK-SVE-VECTOR-BITS-256: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 256 -// CHECK-SVE-VECTOR-BITS-512: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 512 -// CHECK-SVE-VECTOR-BITS-1024: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 1024 -// CHECK-SVE-VECTOR-BITS-2048: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 2048 +// CHECK-SVE-VECTOR-BITS-128: __ARM_FEATURE_SVE_BITS 128 +// CHECK-SVE-VECTOR-BITS-256: __ARM_FEATURE_SVE_BITS 256 +// CHECK-SVE-VECTOR-BITS-512: __ARM_FEATURE_SVE_BITS 512 +// CHECK-SVE-VECTOR-BITS-1024: __ARM_FEATURE_SVE_BITS 1024 +// CHECK-SVE-VECTOR-BITS-2048: __ARM_FEATURE_SVE_BITS 2048 diff --git a/clang/test/Sema/attr-arm-sve-vector-bits.c b/clang/test/Sema/attr-arm-sve-vector-bits.c index f143037fd6114..1bcbfa360c976 100644 --- a/clang/test/Sema/attr-arm-sve-vector-bits.c +++ b/clang/test/Sema/attr-arm-sve-vector-bits.c @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=1024 -fallow-half-arguments-and-returns %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=2048 -fallow-half-arguments-and-returns %s -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef __SVInt8_t svint8_t; typedef __SVInt16_t svint16_t; diff --git a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp index c8ce257ad3265..ea7c4778db0ea 100644 --- a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp +++ b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns %s // expected-no-diagnostics -#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL +#define N __ARM_FEATURE_SVE_BITS typedef __SVInt8_t svint8_t; typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N))); From 485e6db8729383345da9796218d99843449057ac Mon Sep 17 00:00:00 2001 From: OCHyams Date: Thu, 3 Sep 2020 08:28:15 +0100 Subject: [PATCH 092/465] Revert "Adding GDB PrettyPrinter for mlir::Identifier." This reverts commit 9e9e6e698d8ef5dc5b7576058f2022aab2534a52. This commit is causing builds that include the 'debuginfo-tests' project to fail. Apple has a public bot which shows the failure: http://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/23667/console --- debuginfo-tests/CMakeLists.txt | 7 ------ debuginfo-tests/lit.cfg.py | 1 - debuginfo-tests/lit.site.cfg.py.in | 2 -- .../llvm-prettyprinters/gdb/mlir-support.cpp | 8 ------- .../llvm-prettyprinters/gdb/mlir-support.gdb | 8 ------- mlir/utils/gdb-scripts/prettyprinters.py | 22 ------------------- 6 files changed, 48 deletions(-) delete mode 100644 debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp delete mode 100644 debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb delete mode 100644 mlir/utils/gdb-scripts/prettyprinters.py diff --git a/debuginfo-tests/CMakeLists.txt b/debuginfo-tests/CMakeLists.txt index 3750f2e0b026b..d3ac0a4aad90a 100644 --- a/debuginfo-tests/CMakeLists.txt +++ b/debuginfo-tests/CMakeLists.txt @@ -9,12 +9,6 @@ add_llvm_executable(check-gdb-llvm-support ) target_link_libraries(check-gdb-llvm-support PRIVATE LLVMSupport) -add_llvm_executable(check-gdb-mlir-support - llvm-prettyprinters/gdb/mlir-support.cpp -) -target_include_directories(check-gdb-mlir-support PRIVATE ${LLVM_EXTERNAL_MLIR_SOURCE_DIR}/include) -target_link_libraries(check-gdb-mlir-support PRIVATE MLIRIR) - set(DEBUGINFO_TESTS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(DEBUGINFO_TESTS_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) @@ -25,7 +19,6 @@ set(DEBUGINFO_TEST_DEPS llvm-config llvm-objdump check-gdb-llvm-support - check-gdb-mlir-support not ) diff --git a/debuginfo-tests/lit.cfg.py b/debuginfo-tests/lit.cfg.py index ba09b1f4cfae4..4c45b723d2e9a 100644 --- a/debuginfo-tests/lit.cfg.py +++ b/debuginfo-tests/lit.cfg.py @@ -46,7 +46,6 @@ config.debuginfo_tests_src_root, 'llgdb-tests', 'test_debuginfo.pl')), ToolSubst("%llvm_src_root", config.llvm_src_root), ToolSubst("%llvm_tools_dir", config.llvm_tools_dir), - ToolSubst("%mlir_src_root", config.mlir_src_root), ] def get_required_attr(config, attr_name): diff --git a/debuginfo-tests/lit.site.cfg.py.in b/debuginfo-tests/lit.site.cfg.py.in index 4ed49b83bc35f..d5893f577aed0 100644 --- a/debuginfo-tests/lit.site.cfg.py.in +++ b/debuginfo-tests/lit.site.cfg.py.in @@ -20,8 +20,6 @@ config.target_triple = "@TARGET_TRIPLE@" config.host_arch = "@HOST_ARCH@" config.is_msvc = lit.util.pythonize_bool("@MSVC@") -config.mlir_src_root = "@MLIR_SOURCE_DIR@" - config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.python3_executable = "@Python3_EXECUTABLE@" diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp b/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp deleted file mode 100644 index d38fc3d03ad30..0000000000000 --- a/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include "mlir/IR/Identifier.h" -#include "mlir/IR/MLIRContext.h" - -mlir::MLIRContext Context; - -auto Identifier = mlir::Identifier::get("foo", &Context); - -int main() { return 0; } diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb b/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb deleted file mode 100644 index cca435e640e09..0000000000000 --- a/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb +++ /dev/null @@ -1,8 +0,0 @@ -# RUN: gdb -q -batch -n -iex 'source %mlir_src_root/utils/gdb-scripts/prettyprinters.py' -x %s %llvm_tools_dir/check-gdb-mlir-support | FileCheck %s --dump-input=fail -# REQUIRES: debug-info - -break main -run - -# CHECK: "foo" -p Identifier diff --git a/mlir/utils/gdb-scripts/prettyprinters.py b/mlir/utils/gdb-scripts/prettyprinters.py deleted file mode 100644 index 5ea20aca83d88..0000000000000 --- a/mlir/utils/gdb-scripts/prettyprinters.py +++ /dev/null @@ -1,22 +0,0 @@ -"""GDB pretty printers for MLIR types.""" - -import gdb.printing - -class IdentifierPrinter: - """Prints an mlir::Identifier instance.""" - - def __init__(self, val): - self.entry = val['entry'] - - def to_string(self): - ptr = (self.entry + 1).cast(gdb.lookup_type('char').pointer()); - return ptr.string(length=self.entry['keyLength']) - - def display_hint(self): - return 'string' - -pp = gdb.printing.RegexpCollectionPrettyPrinter('MLIRSupport') - -pp.add_printer('mlir::Identifier', '^mlir::Identifier$', IdentifierPrinter) - -gdb.printing.register_pretty_printer(gdb.current_objfile(), pp) From 56ee83e36cf8067d85fc74855e2e45cb4b17195b Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 1 Sep 2020 15:24:14 +0300 Subject: [PATCH 093/465] [llvm-readobj/elf] - Improve warning messages, reported for .stack_sizes sections. Instead of referring to stack sizes sections only by name, we can add section indexes and types to warnings reported. Differential revision: https://reviews.llvm.org/D86934 --- .../tools/llvm-readobj/ELF/stack-sizes.test | 11 +++-- llvm/tools/llvm-readobj/ELFDumper.cpp | 45 +++++++++---------- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/stack-sizes.test b/llvm/test/tools/llvm-readobj/ELF/stack-sizes.test index 0db833de45ae3..c0fa2c8029347 100644 --- a/llvm/test/tools/llvm-readobj/ELF/stack-sizes.test +++ b/llvm/test/tools/llvm-readobj/ELF/stack-sizes.test @@ -186,7 +186,7 @@ Symbols: # SHORT-GNU: Stack Sizes: # SHORT-GNU-NEXT: Size Function # SHORT-GNU-NEXT: 8 foo -# SHORT-GNU-NEXT: warning: '[[FILE]]': found invalid relocation offset (0x1) into section .stack_sizes while trying to extract a stack size entry +# SHORT-GNU-NEXT: warning: '[[FILE]]': found invalid relocation offset (0x1) into SHT_PROGBITS section with index 2 while trying to extract a stack size entry # SHORT-GNU-NEXT: 8 foo # SHORT-LLVM: StackSizes [ @@ -194,7 +194,7 @@ Symbols: # SHORT-LLVM-NEXT: Function: foo # SHORT-LLVM-NEXT: Size: 0x8 # SHORT-LLVM-NEXT: } -# SHORT-LLVM-NEXT: warning: '[[FILE]]': found invalid relocation offset (0x1) into section .stack_sizes while trying to extract a stack size entry +# SHORT-LLVM-NEXT: warning: '[[FILE]]': found invalid relocation offset (0x1) into SHT_PROGBITS section with index 2 while trying to extract a stack size entry # SHORT-LLVM-NEXT: Entry { # SHORT-LLVM-NEXT: Function: foo # SHORT-LLVM-NEXT: Size: 0x8 @@ -361,9 +361,8 @@ Symbols: # RUN: llvm-readelf --stack-sizes %t06 2>&1 | FileCheck %s --check-prefix=BADSIZE -DFILE=%t06 # RUN: llvm-readobj --stack-sizes %t06 2>&1 | FileCheck %s --check-prefix=BADSIZE -DFILE=%t06 -## TODO: these messages should be improved to include section indices. -# BADSIZE: warning: '[[FILE]]': could not extract a valid stack size in section .stack_sizes -# BADSIZE: warning: '[[FILE]]': could not extract a valid stack size in section .stack_sizes +# BADSIZE: warning: '[[FILE]]': could not extract a valid stack size in SHT_PROGBITS section with index 2 +# BADSIZE: warning: '[[FILE]]': could not extract a valid stack size in SHT_PROGBITS section with index 3 --- !ELF FileHeader: @@ -460,7 +459,7 @@ Symbols: # NORELOCSECTION-OUT-LLVM: StackSizes [ # NORELOCSECTION-OUT-LLVM-NEXT: ] -# NORELOCSECTION-ERR: warning: '[[FILE]]': section .stack_sizes does not have a corresponding relocation section +# NORELOCSECTION-ERR: warning: '[[FILE]]': .stack_sizes (SHT_PROGBITS section with index 2) does not have a corresponding relocation section --- !ELF FileHeader: diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index dfcbb00bf2340..d6f5aac2cab25 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -731,11 +731,10 @@ template class DumpStyle { std::function PrintHeader); void printFunctionStackSize(const ELFObjectFile *Obj, uint64_t SymValue, Optional FunctionSec, - const StringRef SectionName, DataExtractor Data, + const Elf_Shdr &StackSizeSec, DataExtractor Data, uint64_t *Offset); void printStackSize(const ELFObjectFile *Obj, RelocationRef Rel, - SectionRef FunctionSec, - const StringRef &StackSizeSectionName, + SectionRef FunctionSec, const Elf_Shdr &StackSizeSec, const RelocationResolver &Resolver, DataExtractor Data); virtual void printStackSizeEntry(uint64_t Size, StringRef FuncName) = 0; virtual void printMipsGOT(const MipsGOTParser &Parser) = 0; @@ -5596,7 +5595,7 @@ template void DumpStyle::printFunctionStackSize(const ELFObjectFile *Obj, uint64_t SymValue, Optional FunctionSec, - const StringRef SectionName, + const Elf_Shdr &StackSizeSec, DataExtractor Data, uint64_t *Offset) { // This function ignores potentially erroneous input, unless it is directly @@ -5641,8 +5640,8 @@ void DumpStyle::printFunctionStackSize(const ELFObjectFile *Obj, if (*Offset == PrevOffset) { reportWarning( createStringError(object_error::parse_failed, - "could not extract a valid stack size in section %s", - SectionName.data()), + "could not extract a valid stack size in " + + describe(*Obj->getELFFile(), StackSizeSec)), Obj->getFileName()); return; } @@ -5662,7 +5661,7 @@ template void DumpStyle::printStackSize(const ELFObjectFile *Obj, RelocationRef Reloc, SectionRef FunctionSec, - const StringRef &StackSizeSectionName, + const Elf_Shdr &StackSizeSec, const RelocationResolver &Resolver, DataExtractor Data) { // This function ignores potentially erroneous input, unless it is directly @@ -5703,15 +5702,15 @@ void DumpStyle::printStackSize(const ELFObjectFile *Obj, reportUniqueWarning(createStringError( object_error::parse_failed, "found invalid relocation offset (0x" + Twine::utohexstr(Offset) + - ") into section " + StackSizeSectionName + + ") into " + describe(*Obj->getELFFile(), StackSizeSec) + " while trying to extract a stack size entry")); return; } uint64_t Addend = Data.getAddress(&Offset); uint64_t SymValue = Resolver(Reloc, RelocSymValue, Addend); - this->printFunctionStackSize(Obj, SymValue, FunctionSec, StackSizeSectionName, - Data, &Offset); + this->printFunctionStackSize(Obj, SymValue, FunctionSec, StackSizeSec, Data, + &Offset); } template @@ -5721,8 +5720,7 @@ void DumpStyle::printNonRelocatableStackSizes( // related to stack size reporting. const ELFFile *EF = Obj->getELFFile(); for (const SectionRef &Sec : Obj->sections()) { - StringRef SectionName = getSectionName(Sec); - if (SectionName != ".stack_sizes") + if (getSectionName(Sec) != ".stack_sizes") continue; PrintHeader(); const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl()); @@ -5741,8 +5739,8 @@ void DumpStyle::printNonRelocatableStackSizes( break; } uint64_t SymValue = Data.getAddress(&Offset); - printFunctionStackSize(Obj, SymValue, /*FunctionSec=*/None, SectionName, - Data, &Offset); + printFunctionStackSize(Obj, SymValue, /*FunctionSec=*/None, *ElfSec, Data, + &Offset); } } } @@ -5807,22 +5805,23 @@ void DumpStyle::printRelocatableStackSizes( PrintHeader(); const SectionRef &StackSizesSec = StackSizeMapEntry.first; const SectionRef &RelocSec = StackSizeMapEntry.second; + const Elf_Shdr *StackSizesELFSec = + Obj->getSection(StackSizesSec.getRawDataRefImpl()); // Warn about stack size sections without a relocation section. - StringRef StackSizeSectionName = getSectionName(StackSizesSec); if (RelocSec == NullSection) { - reportWarning(createError("section " + StackSizeSectionName + - " does not have a corresponding " - "relocation section"), - Obj->getFileName()); + reportWarning( + createError(".stack_sizes (" + + describe(*Obj->getELFFile(), *StackSizesELFSec) + + ") does not have a corresponding " + "relocation section"), + Obj->getFileName()); continue; } // A .stack_sizes section header's sh_link field is supposed to point // to the section that contains the functions whose stack sizes are // described in it. - const Elf_Shdr *StackSizesELFSec = - Obj->getSection(StackSizesSec.getRawDataRefImpl()); const SectionRef FunctionSec = Obj->toSectionRef(unwrapOrError( this->FileName, EF->getSection(StackSizesELFSec->sh_link))); @@ -5844,8 +5843,8 @@ void DumpStyle::printRelocatableStackSizes( ": " + EF->getRelocationTypeName(Reloc.getType()))); continue; } - this->printStackSize(Obj, Reloc, FunctionSec, StackSizeSectionName, - Resolver, Data); + this->printStackSize(Obj, Reloc, FunctionSec, *StackSizesELFSec, Resolver, + Data); } } } From 269984e772dd2aa59c5e12306ea05d4f3a77aa92 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 2 Sep 2020 13:16:42 +0300 Subject: [PATCH 094/465] [llvm-readelf] - Move a bit of common code to printDynamicRelocHeader(). NFC. This helps to isolate printing of the relocation's summary header in a single place. Differential revision: https://reviews.llvm.org/D87042 --- llvm/tools/llvm-readobj/ELFDumper.cpp | 55 +++++++++++---------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index d6f5aac2cab25..b81213d253d5c 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -120,11 +120,11 @@ template class DumpStyle; /// order (DT_REL, DT_RELSZ, DT_RELENT for example). struct DynRegionInfo { DynRegionInfo(StringRef ObjName) : FileName(ObjName) {} - DynRegionInfo(const void *A, uint64_t S, uint64_t ES, StringRef ObjName) + DynRegionInfo(const uint8_t *A, uint64_t S, uint64_t ES, StringRef ObjName) : Addr(A), Size(S), EntSize(ES), FileName(ObjName) {} /// Address in current address space. - const void *Addr = nullptr; + const uint8_t *Addr = nullptr; /// Size in bytes of the region. uint64_t Size = 0; /// Size of each entity in the region. @@ -863,8 +863,6 @@ template class GNUStyle : public DumpStyle { } void printHashedSymbol(const Elf_Sym *FirstSym, uint32_t Sym, StringRef StrTable, uint32_t Bucket); - void printRelocHeader(unsigned SType); - void printRelReloc(unsigned SecIndex, const Elf_Shdr *SymTab, const Elf_Rel &R, unsigned RelIndex) override; void printRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, @@ -3682,7 +3680,8 @@ void GNUStyle::printRelRelaReloc(const Elf_Sym *Sym, StringRef SymbolName, OS << Addend << "\n"; } -template void GNUStyle::printRelocHeader(unsigned SType) { +template +static void printRelocHeaderFields(formatted_raw_ostream &OS, unsigned SType) { bool IsRela = SType == ELF::SHT_RELA || SType == ELF::SHT_ANDROID_RELA; bool IsRelr = SType == ELF::SHT_RELR || SType == ELF::SHT_ANDROID_RELR; if (ELFT::Is64Bits) @@ -3703,6 +3702,16 @@ template void GNUStyle::printRelocHeader(unsigned SType) { OS << "\n"; } +template +static void printDynamicRelocHeader(const ELFFile &Obj, + formatted_raw_ostream &OS, unsigned Type, + StringRef Name, const DynRegionInfo &Reg) { + uint64_t Offset = Reg.Addr - Obj.base(); + OS << "\n'" << Name.str().c_str() << "' relocation section at offset 0x" + << to_hexString(Offset, false) << " contains " << Reg.Size << " bytes:\n"; + printRelocHeaderFields(OS, Type); +} + template static bool isRelocationSec(const typename ELFT::Shdr &Sec) { return Sec.sh_type == ELF::SHT_REL || Sec.sh_type == ELF::SHT_RELA || @@ -3754,7 +3763,7 @@ template void GNUStyle::printRelocations() { OS << "\nRelocation section '" << Name << "' at offset 0x" << to_hexString(Offset, false) << " contains " << EntriesNum << " entries:\n"; - printRelocHeader(Sec.sh_type); + printRelocHeaderFields(OS, Sec.sh_type); this->printRelocationsHelper(Sec); } if (!HasRelocSections) @@ -4436,49 +4445,31 @@ template void GNUStyle::printDynamicRelocations() { const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion(); const DynRegionInfo &DynPLTRelRegion = this->dumper()->getDynPLTRelRegion(); if (DynRelaRegion.Size > 0) { - OS << "\n'RELA' relocation section at offset " - << format_hex(reinterpret_cast(DynRelaRegion.Addr) - - this->Obj.base(), - 1) - << " contains " << DynRelaRegion.Size << " bytes:\n"; - printRelocHeader(ELF::SHT_RELA); + printDynamicRelocHeader(this->Obj, OS, ELF::SHT_RELA, "RELA", + DynRelaRegion); for (const Elf_Rela &Rela : this->dumper()->dyn_relas()) printDynamicRelocation(Rela); } if (DynRelRegion.Size > 0) { - OS << "\n'REL' relocation section at offset " - << format_hex(reinterpret_cast(DynRelRegion.Addr) - - this->Obj.base(), - 1) - << " contains " << DynRelRegion.Size << " bytes:\n"; - printRelocHeader(ELF::SHT_REL); + printDynamicRelocHeader(this->Obj, OS, ELF::SHT_REL, "REL", DynRelRegion); for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) printDynamicRelocation(Rel); } if (DynRelrRegion.Size > 0) { - OS << "\n'RELR' relocation section at offset " - << format_hex(reinterpret_cast(DynRelrRegion.Addr) - - this->Obj.base(), - 1) - << " contains " << DynRelrRegion.Size << " bytes:\n"; - printRelocHeader(ELF::SHT_REL); + printDynamicRelocHeader(this->Obj, OS, ELF::SHT_REL, "RELR", DynRelrRegion); Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); for (const Elf_Rel &R : this->Obj.decode_relrs(Relrs)) printDynamicRelocation(R); } if (DynPLTRelRegion.Size) { - OS << "\n'PLT' relocation section at offset " - << format_hex(reinterpret_cast(DynPLTRelRegion.Addr) - - this->Obj.base(), - 1) - << " contains " << DynPLTRelRegion.Size << " bytes:\n"; - if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) { - printRelocHeader(ELF::SHT_RELA); + printDynamicRelocHeader(this->Obj, OS, ELF::SHT_RELA, "PLT", + DynPLTRelRegion); for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef()) printDynamicRelocation(Rela); } else { - printRelocHeader(ELF::SHT_REL); + printDynamicRelocHeader(this->Obj, OS, ELF::SHT_REL, "PLT", + DynPLTRelRegion); for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef()) printDynamicRelocation(Rel); } From f4f29b956c628dc5029345c97d11c8d198657f87 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Wed, 2 Sep 2020 15:23:55 -0500 Subject: [PATCH 095/465] [PowerPC] Fix missing TLS symbol type. Previous implementations for the TLS models General Dynamic and Initial Exec were missing the ELF::STT_TLS type on symbols that required the type. This patch adds the type. Reviewed By: sfertile, MaskRay Differential Revision: https://reviews.llvm.org/D86777 --- llvm/lib/MC/MCELFStreamer.cpp | 3 +++ llvm/test/CodeGen/PowerPC/pcrel-tls-general-dynamic.ll | 8 ++++++-- llvm/test/CodeGen/PowerPC/pcrel-tls-initial-exec.ll | 8 ++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index 49d863f258bf9..5296e2400765f 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -433,15 +433,18 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO: case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HI: case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA: + case MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL: case MCSymbolRefExpr::VK_PPC_GOT_DTPREL: case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO: case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HI: case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HA: case MCSymbolRefExpr::VK_PPC_TLS: + case MCSymbolRefExpr::VK_PPC_TLS_PCREL: case MCSymbolRefExpr::VK_PPC_GOT_TLSGD: case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO: case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HI: case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA: + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_PCREL: case MCSymbolRefExpr::VK_PPC_TLSGD: case MCSymbolRefExpr::VK_PPC_GOT_TLSLD: case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO: diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tls-general-dynamic.ll b/llvm/test/CodeGen/PowerPC/pcrel-tls-general-dynamic.ll index d58e8c21509a6..dfbcbb3b338be 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-tls-general-dynamic.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-tls-general-dynamic.ll @@ -3,8 +3,9 @@ ; RUN: -enable-ppc-pcrel-tls < %s | FileCheck %s --check-prefix=CHECK-S ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: --relocation-model=pic -mcpu=pwr10 -ppc-asm-full-reg-names \ -; RUN: -enable-ppc-pcrel-tls --filetype=obj < %s | \ -; RUN: llvm-objdump --mcpu=pwr10 -dr - | FileCheck %s --check-prefix=CHECK-O +; RUN: -enable-ppc-pcrel-tls --filetype=obj -o %t.o < %s +; RUN: llvm-objdump --mcpu=pwr10 -dr %t.o |FileCheck %s --check-prefix=CHECK-O +; RUN: llvm-readelf -s %t.o | FileCheck %s --check-prefix=CHECK-SYM ; These test cases are to ensure that when using pc relative memory operations ; ABI correct code and relocations are produced for General Dynamic TLS Model. @@ -45,6 +46,9 @@ define i32 @GeneralDynamicValueLoad() { ; CHECK-O-NEXT: 0000000000000054: R_PPC64_TLSGD x ; CHECK-O-NEXT: 0000000000000054: R_PPC64_REL24_NOTOC __tls_get_addr ; CHECK-O-NEXT: 58: 00 00 63 80 lwz 3, 0(3) + + ; CHECK-SYM-LABEL: Symbol table '.symtab' contains 7 entries + ; CHECK-SYM: 6: 0000000000000000 0 TLS GLOBAL DEFAULT UND x entry: %0 = load i32, i32* @x, align 4 ret i32 %0 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tls-initial-exec.ll b/llvm/test/CodeGen/PowerPC/pcrel-tls-initial-exec.ll index 7789e23515ab4..f10ed538d092c 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-tls-initial-exec.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-tls-initial-exec.ll @@ -3,8 +3,9 @@ ; RUN: FileCheck %s --check-prefix=CHECK-S ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names --filetype=obj \ -; RUN: -enable-ppc-pcrel-tls < %s | llvm-objdump --mcpu=pwr10 -dr - | \ -; RUN: FileCheck %s --check-prefix=CHECK-O +; RUN: -enable-ppc-pcrel-tls -o %t.o < %s +; RUN: llvm-objdump --mcpu=pwr10 -dr %t.o | FileCheck %s --check-prefix=CHECK-O +; RUN: llvm-readelf -s %t.o | FileCheck %s --check-prefix=CHECK-SYM ; These test cases are to ensure that when using pc relative memory operations ; ABI correct code and relocations are produced for Initial Exec TLS Model. @@ -42,6 +43,9 @@ define i32 @InitialExecValueLoad() { ; CHECK-O-NEXT: 2e 68 63 7c lwzx 3, 3, 13 ; CHECK-O-NEXT: 0000000000000029: R_PPC64_TLS x ; CHECK-O-NEXT: 20 00 80 4e blr + +; CHECK-SYM-LABEL: Symbol table '.symtab' contains 6 entries +; CHECK-SYM: 5: 0000000000000000 0 TLS GLOBAL DEFAULT UND x entry: %0 = load i32, i32* @x, align 4 ret i32 %0 From 245f846c4eaf7142000f6548889a3aafb84db149 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 3 Sep 2020 12:01:51 +0100 Subject: [PATCH 096/465] [MemCpyOptimizer] Change required analysis order for BasicAA/PhiValuesAnalysis This is a followup to 1ccfb52a61748, which made a number of changes including the apparently innocuous reordering of required passes in MemCpyOptimizer. This however altered the creation order of BasicAA vs Phi Values analysis, meaning BasicAA did not pick up PhiValues as a cached result. Instead if we require MemoryDependence first it will require PhiValuesAnalysis allowing BasicAA to use it for better results. I don't claim this is an excellent design, but it fixes a nasty little regressions where a query later in JumpThreading was getting worse results. Differential Revision: https://reviews.llvm.org/D87027 --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 4 ++-- llvm/test/CodeGen/AMDGPU/opt-pipeline.ll | 6 +++--- llvm/test/Other/opt-O2-pipeline.ll | 2 +- llvm/test/Other/opt-O3-pipeline-enable-matrix.ll | 2 +- llvm/test/Other/opt-O3-pipeline.ll | 2 +- llvm/test/Other/opt-Os-pipeline.ll | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 1fd899aeebb36..d2a8f5e08d684 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -272,12 +272,12 @@ class MemCpyOptLegacyPass : public FunctionPass { AU.addRequired(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } }; diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll index ca125c7897793..31531a43fc3f2 100644 --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -164,8 +164,8 @@ ; GCN-O1-NEXT: Delete dead loops ; GCN-O1-NEXT: Unroll loops ; GCN-O1-NEXT: SROA -; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Phi Values Analysis +; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Memory Dependence Analysis ; GCN-O1-NEXT: MemCpy Optimization ; GCN-O1-NEXT: Sparse Conditional Constant Propagation @@ -493,9 +493,9 @@ ; GCN-O2-NEXT: Lazy Block Frequency Analysis ; GCN-O2-NEXT: Optimization Remark Emitter ; GCN-O2-NEXT: Global Value Numbering +; GCN-O2-NEXT: Phi Values Analysis ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results -; GCN-O2-NEXT: Phi Values Analysis ; GCN-O2-NEXT: Memory Dependence Analysis ; GCN-O2-NEXT: MemCpy Optimization ; GCN-O2-NEXT: Sparse Conditional Constant Propagation @@ -853,9 +853,9 @@ ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Global Value Numbering +; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results -; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Memory Dependence Analysis ; GCN-O3-NEXT: MemCpy Optimization ; GCN-O3-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 4b7e8eed1f256..e606e7cfac171 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -140,9 +140,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll index c3c4b9d6d80cd..aaee6f786bac9 100644 --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -145,9 +145,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index 26dfee08b2cef..b2d2f85ae21be 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -145,9 +145,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index 3e778ea7e9755..cc91707c4b009 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -126,9 +126,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation From 1d442415a30ef3be0e6ee189bce8795f203e712f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 12:01:46 +0100 Subject: [PATCH 097/465] [X86][AVX] Test SSE41 BLENDV combines on AVX targets as well Show up any differences due to SSE41 variant being locked to use xmm0 --- .../CodeGen/X86/combine-sse41-intrinsics.ll | 201 ++++++++++++------ 1 file changed, 131 insertions(+), 70 deletions(-) diff --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll index cc4dee33c619a..88421d8f3f173 100644 --- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll +++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX define <2 x double> @test_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) { @@ -27,28 +28,43 @@ define <8 x i16> @test_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) { } define <2 x double> @test2_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) { -; CHECK-LABEL: test2_x86_sse41_blend_pd: -; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test2_x86_sse41_blend_pd: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test2_x86_sse41_blend_pd: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 -1) ret <2 x double> %1 } define <4 x float> @test2_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) { -; CHECK-LABEL: test2_x86_sse41_blend_ps: -; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test2_x86_sse41_blend_ps: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test2_x86_sse41_blend_ps: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 -1) ret <4 x float> %1 } define <8 x i16> @test2_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: test2_x86_sse41_pblend_w: -; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test2_x86_sse41_pblend_w: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test2_x86_sse41_pblend_w: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 -1) ret <8 x i16> %1 } @@ -78,13 +94,18 @@ define <8 x i16> @test3_x86_sse41_pblend_w(<8 x i16> %a0) { } define double @demandedelts_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { -; CHECK-LABEL: demandedelts_blendvpd: -; CHECK: # %bb.0: -; CHECK-NEXT: movapd %xmm0, %xmm3 -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: blendvpd %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movapd %xmm3, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: demandedelts_blendvpd: +; SSE: # %bb.0: +; SSE-NEXT: movapd %xmm0, %xmm3 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 +; SSE-NEXT: movapd %xmm3, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: demandedelts_blendvpd: +; AVX: # %bb.0: +; AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer %2 = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer %3 = shufflevector <2 x double> %a2, <2 x double> undef, <2 x i32> zeroinitializer @@ -94,13 +115,18 @@ define double @demandedelts_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x do } define float @demandedelts_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { -; CHECK-LABEL: demandedelts_blendvps: -; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm0, %xmm3 -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: blendvps %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movaps %xmm3, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: demandedelts_blendvps: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 +; SSE-NEXT: movaps %xmm3, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: demandedelts_blendvps: +; AVX: # %bb.0: +; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer %2 = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer %3 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> zeroinitializer @@ -110,15 +136,22 @@ define float @demandedelts_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float } define <16 x i8> @demandedelts_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { -; CHECK-LABEL: demandedelts_pblendvb: -; CHECK: # %bb.0: -; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: movdqa %xmm2, %xmm0 -; CHECK-NEXT: pblendvb %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: pxor %xmm0, %xmm0 -; CHECK-NEXT: pshufb %xmm0, %xmm3 -; CHECK-NEXT: movdqa %xmm3, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: demandedelts_pblendvb: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm3 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: pshufb %xmm0, %xmm3 +; SSE-NEXT: movdqa %xmm3, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: demandedelts_pblendvb: +; AVX: # %bb.0: +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer %2 = shufflevector <16 x i8> %a1, <16 x i8> undef, <16 x i32> zeroinitializer %3 = shufflevector <16 x i8> %a2, <16 x i8> undef, <16 x i32> zeroinitializer @@ -128,19 +161,32 @@ define <16 x i8> @demandedelts_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> } define <2 x i64> @demandedbits_blendvpd(i64 %a0, i64 %a2, <2 x double> %a3) { -; CHECK-LABEL: demandedbits_blendvpd: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: orq $1, %rax -; CHECK-NEXT: orq $4, %rdi -; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: movq %rdi, %xmm2 -; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero -; CHECK-NEXT: movq {{.*#+}} xmm2 = xmm2[0],zero -; CHECK-NEXT: blendvpd %xmm0, %xmm2, %xmm1 -; CHECK-NEXT: psrlq $11, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: demandedbits_blendvpd: +; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: orq $1, %rax +; SSE-NEXT: orq $4, %rdi +; SSE-NEXT: movq %rax, %xmm1 +; SSE-NEXT: movq %rdi, %xmm2 +; SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero +; SSE-NEXT: movq {{.*#+}} xmm2 = xmm2[0],zero +; SSE-NEXT: blendvpd %xmm0, %xmm2, %xmm1 +; SSE-NEXT: psrlq $11, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: demandedbits_blendvpd: +; AVX: # %bb.0: +; AVX-NEXT: movq %rdi, %rax +; AVX-NEXT: orq $1, %rax +; AVX-NEXT: orq $4, %rdi +; AVX-NEXT: vmovq %rax, %xmm1 +; AVX-NEXT: vmovq %rdi, %xmm2 +; AVX-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero +; AVX-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero +; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; AVX-NEXT: vpsrlq $11, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = or i64 %a0, 1 %2 = or i64 %a0, 4 %3 = bitcast i64 %1 to double @@ -154,26 +200,36 @@ define <2 x i64> @demandedbits_blendvpd(i64 %a0, i64 %a2, <2 x double> %a3) { } define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { -; CHECK-LABEL: xor_pblendvb: -; CHECK: # %bb.0: -; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: pblendvb %xmm0, %xmm3, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: xor_pblendvb: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm3 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: pblendvb %xmm0, %xmm3, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: xor_pblendvb: +; AVX: # %bb.0: +; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %1 = xor <16 x i8> %a2, %2 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %1) ret <16 x i8> %2 } define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { -; CHECK-LABEL: xor_blendvps: -; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm0, %xmm3 -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: blendvps %xmm0, %xmm3, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: xor_blendvps: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: blendvps %xmm0, %xmm3, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: xor_blendvps: +; AVX: # %bb.0: +; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %1 = bitcast <4 x float> %a2 to <4 x i32> %2 = xor <4 x i32> %1, %3 = bitcast <4 x i32> %2 to <4 x float> @@ -182,13 +238,18 @@ define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> % } define <2 x double> @xor_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { -; CHECK-LABEL: xor_blendvpd: -; CHECK: # %bb.0: -; CHECK-NEXT: movapd %xmm0, %xmm3 -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: blendvpd %xmm0, %xmm3, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: xor_blendvpd: +; SSE: # %bb.0: +; SSE-NEXT: movapd %xmm0, %xmm3 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: blendvpd %xmm0, %xmm3, %xmm1 +; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: xor_blendvpd: +; AVX: # %bb.0: +; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %1 = bitcast <2 x double> %a2 to <4 x i32> %2 = xor <4 x i32> %1, %3 = bitcast <4 x i32> %2 to <2 x double> From ca860dc57732fd1cd745403461b9d0d80fe571c4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 12:05:35 +0100 Subject: [PATCH 098/465] [X86][SSE] Add PR47404 test case --- .../CodeGen/X86/combine-sse41-intrinsics.ll | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll index 88421d8f3f173..26fd3cf7ab29c 100644 --- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll +++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll @@ -257,6 +257,28 @@ define <2 x double> @xor_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ret <2 x double> %4 } +define <16 x i8> @PR47404(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { +; SSE-LABEL: PR47404: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm3 +; SSE-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE-NEXT: pcmpgtb %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pblendvb %xmm0, %xmm3, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: PR47404: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq + %4 = icmp sgt <16 x i8> %2, + %5 = select <16 x i1> %4, <16 x i8> %0, <16 x i8> %1 + ret <16 x i8> %5 +} + declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) From a344b382a0f64922c22a4ad048aca925a784942a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 3 Sep 2020 12:16:17 +0100 Subject: [PATCH 099/465] [GVN] Preserve MemorySSA if it is available. Preserve MemorySSA if it is available before running GVN. DSE with MemorySSA will run closely after GVN. If GVN and 2 other passes preserve MemorySSA, DSE can re-use MemorySSA used by LICM when doing LTO. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D86534 --- llvm/include/llvm/Transforms/Scalar/GVN.h | 6 +- llvm/lib/Transforms/Scalar/GVN.cpp | 64 +++++++++++-- .../test/Transforms/GVN/preserve-memoryssa.ll | 95 +++++++++++++++++++ 3 files changed, 153 insertions(+), 12 deletions(-) create mode 100644 llvm/test/Transforms/GVN/preserve-memoryssa.ll diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index f2818c6b792ec..be3804f95c3e8 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -46,11 +46,12 @@ class FunctionPass; class IntrinsicInst; class LoadInst; class LoopInfo; +class MemorySSA; +class MemorySSAUpdater; class OptimizationRemarkEmitter; class PHINode; class TargetLibraryInfo; class Value; - /// A private "module" namespace for types and utilities used by GVN. These /// are implementation details and should not be used by clients. namespace gvn LLVM_LIBRARY_VISIBILITY { @@ -211,6 +212,7 @@ class GVN : public PassInfoMixin { OptimizationRemarkEmitter *ORE = nullptr; ImplicitControlFlowTracking *ICF = nullptr; LoopInfo *LI = nullptr; + MemorySSAUpdater *MSSAU = nullptr; ValueTable VN; @@ -246,7 +248,7 @@ class GVN : public PassInfoMixin { bool runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *ORE); + OptimizationRemarkEmitter *ORE, MemorySSA *MSSA = nullptr); /// Push a new Value to the LeaderTable onto the list for its value number. void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index ff7596b19cb2b..f8962c0852249 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -26,8 +26,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DomTreeUpdater.h" @@ -36,6 +36,8 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -653,14 +655,18 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) { auto *MemDep = isMemDepEnabled() ? &AM.getResult(F) : nullptr; auto *LI = AM.getCachedResult(F); + auto *MSSA = AM.getCachedResult(F); auto &ORE = AM.getResult(F); - bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE); + bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, + MSSA ? &MSSA->getMSSA() : nullptr); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); PA.preserve(); PA.preserve(); + if (MSSA) + PA.preserve(); if (LI) PA.preserve(); return PA; @@ -1335,6 +1341,22 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, LI->getAlign(), LI->getOrdering(), LI->getSyncScopeID(), UnavailablePred->getTerminator()); NewLoad->setDebugLoc(LI->getDebugLoc()); + if (MSSAU) { + auto *MSSA = MSSAU->getMemorySSA(); + // Get the defining access of the original load or use the load if it is a + // MemoryDef (e.g. because it is volatile). The inserted loads are + // guaranteed to load from the same definition. + auto *LIAcc = MSSA->getMemoryAccess(LI); + auto *DefiningAcc = + isa(LIAcc) ? LIAcc : LIAcc->getDefiningAccess(); + auto *NewAccess = MSSAU->createMemoryAccessInBB( + NewLoad, DefiningAcc, NewLoad->getParent(), + MemorySSA::BeforeTerminator); + if (auto *NewDef = dyn_cast(NewAccess)) + MSSAU->insertDef(NewDef, /*RenameUses=*/true); + else + MSSAU->insertUse(cast(NewAccess), /*RenameUses=*/true); + } // Transfer the old load's AA tags to the new load. AAMDNodes Tags; @@ -1551,9 +1573,17 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { // Insert a new store to null instruction before the load to indicate that // this code is not reachable. FIXME: We could insert unreachable // instruction directly because we can modify the CFG. - new StoreInst(UndefValue::get(Int8Ty), - Constant::getNullValue(Int8Ty->getPointerTo()), - IntrinsicI); + auto *NewS = new StoreInst(UndefValue::get(Int8Ty), + Constant::getNullValue(Int8Ty->getPointerTo()), + IntrinsicI); + if (MSSAU) { + // This added store is to null, so it will never executed and we can + // just use the LiveOnEntry def as defining access. + auto *NewDef = MSSAU->createMemoryAccessInBB( + NewS, MSSAU->getMemorySSA()->getLiveOnEntryDef(), NewS->getParent(), + MemorySSA::BeforeTerminator); + MSSAU->insertDef(cast(NewDef), /*RenameUses=*/true); + } } if (isAssumeWithEmptyBundle(*IntrinsicI)) markInstructionForDeletion(IntrinsicI); @@ -1687,6 +1717,8 @@ bool GVN::processLoad(LoadInst *L) { // Replace the load! patchAndReplaceAllUsesWith(L, AvailableValue); markInstructionForDeletion(L); + if (MSSAU) + MSSAU->removeMemoryAccess(L); ++NumGVNLoad; reportLoadElim(L, AvailableValue, ORE); // Tell MDA to rexamine the reused pointer since we might have more @@ -2204,7 +2236,7 @@ bool GVN::processInstruction(Instruction *I) { bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *RunORE) { + OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) { AC = &RunAC; DT = &RunDT; VN.setDomTree(DT); @@ -2217,6 +2249,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, VN.setMemDep(MD); ORE = RunORE; InvalidBlockRPONumbers = true; + MemorySSAUpdater Updater(MSSA); + MSSAU = MSSA ? &Updater : nullptr; bool Changed = false; bool ShouldContinue = true; @@ -2227,7 +2261,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { BasicBlock *BB = &*FI++; - bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, nullptr, MD); + bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD); if (removedBlock) ++NumGVNBlocks; @@ -2263,6 +2297,9 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, // iteration. DeadBlocks.clear(); + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + return Changed; } @@ -2303,6 +2340,8 @@ bool GVN::processBlock(BasicBlock *BB) { salvageKnowledge(I, AC); salvageDebugInfo(*I); if (MD) MD->removeInstruction(I); + if (MSSAU) + MSSAU->removeMemoryAccess(I); LLVM_DEBUG(verifyRemoved(I)); ICF->removeInstruction(I); I->eraseFromParent(); @@ -2533,6 +2572,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) { LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); + if (MSSAU) + MSSAU->removeMemoryAccess(CurInst); LLVM_DEBUG(verifyRemoved(CurInst)); // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes // some assertion failures. @@ -2577,7 +2618,7 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { // possible. BasicBlock *BB = SplitCriticalEdge( Pred, Succ, - CriticalEdgeSplittingOptions(DT, LI).unsetPreserveLoopSimplify()); + CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify()); if (MD) MD->invalidateCachedPredecessors(); InvalidBlockRPONumbers = true; @@ -2592,7 +2633,7 @@ bool GVN::splitCriticalEdges() { do { std::pair Edge = toSplit.pop_back_val(); SplitCriticalEdge(Edge.first, Edge.second, - CriticalEdgeSplittingOptions(DT, LI)); + CriticalEdgeSplittingOptions(DT, LI, MSSAU)); } while (!toSplit.empty()); if (MD) MD->invalidateCachedPredecessors(); InvalidBlockRPONumbers = true; @@ -2791,6 +2832,7 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { auto *LIWP = getAnalysisIfAvailable(); + auto *MSSAWP = getAnalysisIfAvailable(); return Impl.runImpl( F, getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), @@ -2800,7 +2842,8 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { ? &getAnalysis().getMemDep() : nullptr, LIWP ? &LIWP->getLoopInfo() : nullptr, - &getAnalysis().getORE()); + &getAnalysis().getORE(), + MSSAWP ? &MSSAWP->getMSSA() : nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -2817,6 +2860,7 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { AU.addPreserved(); AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); } private: diff --git a/llvm/test/Transforms/GVN/preserve-memoryssa.ll b/llvm/test/Transforms/GVN/preserve-memoryssa.ll new file mode 100644 index 0000000000000..a815baaa3d008 --- /dev/null +++ b/llvm/test/Transforms/GVN/preserve-memoryssa.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -aa-pipeline=basic-aa -passes='require,gvn' -S -verify-memoryssa %s | FileCheck %s + +; REQUIRES: asserts + +declare void @use(i32) readnone + +define i32 @test(i32* %ptr.0, i32** %ptr.1, i1 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LV_0:%.*]] = load i32, i32* [[PTR_0:%.*]], align 8 +; CHECK-NEXT: call void @use(i32 [[LV_0]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN749:%.*]], label [[FOR_INC774:%.*]] +; CHECK: if.then749: +; CHECK-NEXT: [[LV_1:%.*]] = load i32*, i32** [[PTR_1:%.*]], align 8 +; CHECK-NEXT: store i32 10, i32* [[LV_1]], align 4 +; CHECK-NEXT: [[LV_2_PRE:%.*]] = load i32, i32* [[PTR_0]], align 8 +; CHECK-NEXT: br label [[FOR_INC774]] +; CHECK: for.inc774: +; CHECK-NEXT: [[LV_2:%.*]] = phi i32 [ [[LV_2_PRE]], [[IF_THEN749]] ], [ [[LV_0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @use(i32 [[LV_2]]) +; CHECK-NEXT: ret i32 1 +; +entry: + br label %for.end435 + +for.end435: + %lv.0 = load i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.0) + br label %if.end724 + +if.end724: + br i1 %c, label %if.then749, label %for.inc774 + +if.then749: + %lv.1 = load i32*, i32** %ptr.1, align 8 + %arrayidx772 = getelementptr inbounds i32, i32* %lv.1, i64 0 + store i32 10, i32* %arrayidx772, align 4 + br label %for.inc774 + +for.inc774: + br label %for.body830 + +for.body830: + %lv.2 = load i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.2) + br label %for.body.i22 + +for.body.i22: + ret i32 1 +} + +define i32 @test_volatile(i32* %ptr.0, i32** %ptr.1, i1 %c) { +; CHECK-LABEL: @test_volatile( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LV_0:%.*]] = load volatile i32, i32* [[PTR_0:%.*]], align 8 +; CHECK-NEXT: call void @use(i32 [[LV_0]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN749:%.*]], label [[FOR_INC774:%.*]] +; CHECK: if.then749: +; CHECK-NEXT: [[LV_1:%.*]] = load volatile i32*, i32** [[PTR_1:%.*]], align 8 +; CHECK-NEXT: store i32 10, i32* [[LV_1]], align 4 +; CHECK-NEXT: br label [[FOR_INC774]] +; CHECK: for.inc774: +; CHECK-NEXT: [[LV_2:%.*]] = load volatile i32, i32* [[PTR_0]], align 8 +; CHECK-NEXT: call void @use(i32 [[LV_2]]) +; CHECK-NEXT: ret i32 1 +; +entry: + br label %for.end435 + +for.end435: + %lv.0 = load volatile i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.0) + br label %if.end724 + +if.end724: + br i1 %c, label %if.then749, label %for.inc774 + +if.then749: + %lv.1 = load volatile i32*, i32** %ptr.1, align 8 + %arrayidx772 = getelementptr inbounds i32, i32* %lv.1, i64 0 + store i32 10, i32* %arrayidx772, align 4 + br label %for.inc774 + +for.inc774: + br label %for.body830 + +for.body830: + %lv.2 = load volatile i32, i32* %ptr.0, align 8 + call void @use(i32 %lv.2) + br label %for.body.i22 + +for.body.i22: + ret i32 1 +} From 159f9a69b4bb4ee36a69c0dcb276fb2b533cbda2 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Thu, 3 Sep 2020 18:18:40 +0700 Subject: [PATCH 100/465] [Test] Add test showing some simple cases that IndVarSimplify does not cover --- .../IndVarSimplify/monotonic_checks.ll | 169 ++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll diff --git a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll new file mode 100644 index 0000000000000..6a9713410200e --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll @@ -0,0 +1,169 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -indvars -S < %s | FileCheck %s +; RUN: opt -passes=indvars -S < %s | FileCheck %s + +; Monotonic decrementing iv. we should be able to prove that %iv.next Date: Thu, 3 Sep 2020 20:01:23 +0800 Subject: [PATCH 101/465] [NFC][RISCV] Simplify pass arg of RISCVMergeBaseOffsetOpt Reviewed By: lenary, asb Differential Revision: https://reviews.llvm.org/D87069 --- llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index 4c9013aa1e234..6c78c47e8551d 100644 --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -64,7 +64,7 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass { } // end anonymous namespace char RISCVMergeBaseOffsetOpt::ID = 0; -INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset", +INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE, RISCV_MERGE_BASE_OFFSET_NAME, false, false) // Detect the pattern: From e56edb801bbd41925dbaca39162731613fb289f4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 12:55:14 +0100 Subject: [PATCH 102/465] [X86][SSE] Fold select(X > -1, A, B) -> select(0 > X, B, A) (PR47404) Help PBLENDVB peek through to the sign bit source of the selection mask by swapping the select condition and inputs. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 +++++++++- llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll | 12 ++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2af3d743728ee..6560ab2bcccdd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40710,10 +40710,18 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return V; // select(~Cond, X, Y) -> select(Cond, Y, X) - if (CondVT.getScalarType() != MVT::i1) + if (CondVT.getScalarType() != MVT::i1) { if (SDValue CondNot = IsNOT(Cond, DAG)) return DAG.getNode(N->getOpcode(), DL, VT, DAG.getBitcast(CondVT, CondNot), RHS, LHS); + // pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the signbit. + if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse() && + ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) { + Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT, + DAG.getConstant(0, DL, CondVT), Cond.getOperand(0)); + return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS); + } + } // Try to optimize vXi1 selects if both operands are either all constants or // bitcasts from scalar integer type. In that case we can convert the operands diff --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll index 26fd3cf7ab29c..7039e33c00935 100644 --- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll +++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll @@ -261,18 +261,14 @@ define <16 x i8> @PR47404(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { ; SSE-LABEL: PR47404: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm3 -; SSE-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE-NEXT: pcmpgtb %xmm0, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm0 -; SSE-NEXT: pblendvb %xmm0, %xmm3, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 +; SSE-NEXT: movdqa %xmm3, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: PR47404: ; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %4 = icmp sgt <16 x i8> %2, %5 = select <16 x i1> %4, <16 x i8> %0, <16 x i8> %1 From 9f18f3c858d5ff8154701a50c2be6f7b19667a1d Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 28 Aug 2020 12:30:56 +0100 Subject: [PATCH 103/465] [lldb] Improve test failure reporting for expect() This updates the errors reported by expect() to something like: ``` Ran command: "help" Got output: Debugger commands: <...> Expecting start string: "Debugger commands:" (was found) Expecting end string: "foo" (was not found) ``` (see added tests for more examples) This shows the user exactly what was run, what checks passed and which failed. Along with whether that check was supposed to pass. (including what regex patterns matched) These lines are also output to the test trace file, whether the test passes or not. Note that expect() will still fail at the first failed check, in line with previous behaviour. Also I have flipped the wording of the assert message functions (.*_MSG) to describe failures not successes. This makes more sense as they are only shown on assert failures. Reviewed By: labath Differential Revision: https://reviews.llvm.org/D86792 --- .../Python/lldbsuite/test/lldbtest.py | 100 ++++++++------- .../TestAssertMessages.py | 115 ++++++++++++++++++ 2 files changed, 174 insertions(+), 41 deletions(-) create mode 100644 lldb/test/API/assert_messages_test/TestAssertMessages.py diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index dacd5ed734b50..e1966692b03c7 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -179,12 +179,12 @@ def CMD_MSG(str): - '''A generic "Command '%s' returns successfully" message generator.''' - return "Command '%s' returns successfully" % str + '''A generic "Command '%s' did not return successfully" message generator.''' + return "Command '%s' did not return successfully" % str def COMPLETION_MSG(str_before, str_after, completions): - '''A generic message generator for the completion mechanism.''' + '''A generic assertion failed message generator for the completion mechanism.''' return ("'%s' successfully completes to '%s', but completions were:\n%s" % (str_before, str_after, "\n".join(completions))) @@ -198,8 +198,8 @@ def EXP_MSG(str, actual, exe): def SETTING_MSG(setting): - '''A generic "Value of setting '%s' is correct" message generator.''' - return "Value of setting '%s' is correct" % setting + '''A generic "Value of setting '%s' is not correct" message generator.''' + return "Value of setting '%s' is not correct" % setting def line_number(filename, string_to_match): @@ -2433,58 +2433,76 @@ def expect( with recording(self, trace) as sbuf: print("looking at:", output, file=sbuf) - # The heading says either "Expecting" or "Not expecting". - heading = "Expecting" if matching else "Not expecting" + expecting_str = "Expecting" if matching else "Not expecting" + def found_str(matched): + return "was found" if matched else "was not found" + + # To be used as assert fail message and/or trace content + log_lines = [ + "{}:".format("Ran command" if exe else "Checking string"), + "\"{}\"".format(str), + # Space out command and output + "", + ] + if exe: + # Newline before output to make large strings more readable + log_lines.append("Got output:\n{}".format(output)) - # Start from the startstr, if specified. - # If there's no startstr, set the initial state appropriately. - matched = output.startswith(startstr) if startstr else ( - True if matching else False) + # Assume that we start matched if we want a match + # Meaning if you have no conditions, matching or + # not matching will always pass + matched = matching + # We will stop checking on first failure if startstr: - with recording(self, trace) as sbuf: - print("%s start string: %s" % (heading, startstr), file=sbuf) - print("Matched" if matched else "Not matched", file=sbuf) + matched = output.startswith(startstr) + log_lines.append("{} start string: \"{}\" ({})".format( + expecting_str, startstr, found_str(matched))) - # Look for endstr, if specified. - keepgoing = matched if matching else not matched - if endstr: + if endstr and matched == matching: matched = output.endswith(endstr) - with recording(self, trace) as sbuf: - print("%s end string: %s" % (heading, endstr), file=sbuf) - print("Matched" if matched else "Not matched", file=sbuf) + log_lines.append("{} end string: \"{}\" ({})".format( + expecting_str, endstr, found_str(matched))) - # Look for sub strings, if specified. - keepgoing = matched if matching else not matched - if substrs and keepgoing: + if substrs and matched == matching: start = 0 for substr in substrs: index = output[start:].find(substr) start = start + index if ordered and matching else 0 matched = index != -1 - with recording(self, trace) as sbuf: - print("%s sub string: %s" % (heading, substr), file=sbuf) - print("Matched" if matched else "Not matched", file=sbuf) - keepgoing = matched if matching else not matched - if not keepgoing: + log_lines.append("{} sub string: \"{}\" ({})".format( + expecting_str, substr, found_str(matched))) + + if matched != matching: break - # Search for regular expression patterns, if specified. - keepgoing = matched if matching else not matched - if patterns and keepgoing: + if patterns and matched == matching: for pattern in patterns: - # Match Objects always have a boolean value of True. - matched = bool(re.search(pattern, output)) - with recording(self, trace) as sbuf: - print("%s pattern: %s" % (heading, pattern), file=sbuf) - print("Matched" if matched else "Not matched", file=sbuf) - keepgoing = matched if matching else not matched - if not keepgoing: + matched = re.search(pattern, output) + + pattern_line = "{} regex pattern: \"{}\" ({}".format( + expecting_str, pattern, found_str(matched)) + if matched: + pattern_line += ", matched \"{}\"".format( + matched.group(0)) + pattern_line += ")" + log_lines.append(pattern_line) + + # Convert to bool because match objects + # are True-ish but != True itself + matched = bool(matched) + if matched != matching: break - self.assertTrue(matched if matching else not matched, - msg + "\nCommand output:\n" + EXP_MSG(str, output, exe) - if msg else EXP_MSG(str, output, exe)) + # If a check failed, add any extra assert message + if msg is not None and matched != matching: + log_lines.append(msg) + + log_msg = "\n".join(log_lines) + with recording(self, trace) as sbuf: + print(log_msg, file=sbuf) + if matched != matching: + self.fail(log_msg) def expect_expr( self, diff --git a/lldb/test/API/assert_messages_test/TestAssertMessages.py b/lldb/test/API/assert_messages_test/TestAssertMessages.py new file mode 100644 index 0000000000000..6619a65ad69ea --- /dev/null +++ b/lldb/test/API/assert_messages_test/TestAssertMessages.py @@ -0,0 +1,115 @@ +""" +Test the format of API test suite assert failure messages +""" + + +import lldb +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.lldbtest import * +from textwrap import dedent + + +class AssertMessagesTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + NO_DEBUG_INFO_TESTCASE = True + + def assert_expect_fails_with(self, cmd, expect_args, expected_msg): + try: + # This expect should fail + self.expect(cmd, **expect_args) + except AssertionError as e: + # Then check message from previous expect + self.expect(str(e), exe=False, substrs=[dedent(expected_msg)]) + else: + self.fail("Initial expect should have raised AssertionError!") + + def test_expect(self): + """Test format of messages produced by expect(...)""" + + # When an expect passes the messages are sent to the trace + # file which we can't access here. So really, these only + # check what failures look like, but it *should* be the same + # content for the trace log too. + + # Will stop at startstr fail + self.assert_expect_fails_with("settings list prompt", + dict(startstr="dog", endstr="cat"), + """\ + Ran command: + "settings list prompt" + + Got output: + prompt -- The debugger command line prompt displayed for the user. + + Expecting start string: "dog" (was not found)""") + + # startstr passes, endstr fails + # We see both reported + self.assert_expect_fails_with("settings list prompt", + dict(startstr=" prompt -- ", endstr="foo"), + """\ + Ran command: + "settings list prompt" + + Got output: + prompt -- The debugger command line prompt displayed for the user. + + Expecting start string: " prompt -- " (was found) + Expecting end string: "foo" (was not found)""") + + # Same thing for substrs, regex patterns ignored because of substr failure + # Any substr after the first missing is also ignored + self.assert_expect_fails_with("abcdefg", + dict(substrs=["abc", "ijk", "xyz"], + patterns=["foo", "bar"], exe=False), + """\ + Checking string: + "abcdefg" + + Expecting sub string: "abc" (was found) + Expecting sub string: "ijk" (was not found)""") + + # Regex patterns also stop at first failure, subsequent patterns ignored + # They are last in the chain so no other check gets skipped + # Including the rest of the conditions here to prove they are run and shown + self.assert_expect_fails_with("0123456789", + dict(startstr="012", endstr="789", substrs=["345", "678"], + patterns=["[0-9]+", "[a-f]+", "a|b|c"], exe=False), + """\ + Checking string: + "0123456789" + + Expecting start string: "012" (was found) + Expecting end string: "789" (was found) + Expecting sub string: "345" (was found) + Expecting sub string: "678" (was found) + Expecting regex pattern: "[0-9]+" (was found, matched "0123456789") + Expecting regex pattern: "[a-f]+" (was not found)""") + + # This time we dont' want matches but we do get them + self.assert_expect_fails_with("the quick brown fox", + # Note that the second pattern *will* match + dict(patterns=["[0-9]+", "fox"], exe=False, matching=False, + startstr="cat", endstr="rabbit", substrs=["abc", "def"]), + """\ + Checking string: + "the quick brown fox" + + Not expecting start string: "cat" (was not found) + Not expecting end string: "rabbit" (was not found) + Not expecting sub string: "abc" (was not found) + Not expecting sub string: "def" (was not found) + Not expecting regex pattern: "[0-9]+" (was not found) + Not expecting regex pattern: "fox" (was found, matched "fox")""") + + # Extra assert messages are only printed when we get a failure + # So I can't test that from here, just how it looks when it's printed + self.assert_expect_fails_with("mouse", + dict(startstr="cat", exe=False, msg="Reason for check goes here!"), + """\ + Checking string: + "mouse" + + Expecting start string: "cat" (was not found) + Reason for check goes here!""") From 6de51189b07bbae853cc7c053dfb5f383c308c76 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 3 Sep 2020 12:30:36 +0100 Subject: [PATCH 104/465] [PassManager] Move load/store motion pass after DSE in LTO pipeline. As far as I am aware, the placement of MergedLoadStoreMotion in the pipeline is not heavily tuned currently. It seems to not matter much if we do it after DSE in the LTO pipeline (no binary changes for -O3 -flto on MultiSource/SPEC2000/SPEC2006). Moving it after DSE however has a major benefit: MemorySSA is constructed by LICM and is consumed by DSE, so if MergedLoadStoreMotion happens after DSE, we do not need to preserve MemorySSA in it. If there are any concerns with this move, I can also update MergedLoadStoreMotion to preserve MemorySSA. This patch together with D86651 (preserve MemSSA in MemCpyOpt) and D86534 (preserve MemSSA in GVN) are the remaining patches to bring down compile-time for DSE + MemorySSA to the levels outlined in http://lists.llvm.org/pipermail/llvm-dev/2020-August/144417.html Once they land, we should be able to start with flipping the switch on enabling DSE + MmeorySSA. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D86967 --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index c045c277706b3..326d1ab28b60a 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -1011,13 +1011,13 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); - PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. PM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. PM.add(createMemCpyOptPass()); // Remove dead memcpys. // Nuke dead stores. PM.add(createDeadStoreEliminationPass()); + PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. // More loops are countable; try to optimize them. PM.add(createIndVarSimplifyPass()); From e1a3038689947f7b36dccdc7b8308881cd763a89 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 3 Sep 2020 09:04:49 -0400 Subject: [PATCH 105/465] [EarlyCSE] add tests for fma/fmuladd; NFC --- llvm/test/Transforms/EarlyCSE/commute.ll | 46 ++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/llvm/test/Transforms/EarlyCSE/commute.ll b/llvm/test/Transforms/EarlyCSE/commute.ll index abecc3903a6f9..57c5a853a12ff 100644 --- a/llvm/test/Transforms/EarlyCSE/commute.ll +++ b/llvm/test/Transforms/EarlyCSE/commute.ll @@ -1029,6 +1029,49 @@ define i16 @umul_fix_scale(i16 %a, i16 %b, i32 %s) { ret i16 %o } +; TODO: handle >2 args + +define float @fma(float %a, float %b, float %c) { +; CHECK-LABEL: @fma( +; CHECK-NEXT: [[X:%.*]] = call float @llvm.fma.f32(float [[A:%.*]], float [[B:%.*]], float [[C:%.*]]) +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.fma.f32(float [[B]], float [[A]], float [[C]]) +; CHECK-NEXT: [[R:%.*]] = fdiv nnan float [[X]], [[Y]] +; CHECK-NEXT: ret float [[R]] +; + %x = call float @llvm.fma.f32(float %a, float %b, float %c) + %y = call float @llvm.fma.f32(float %b, float %a, float %c) + %r = fdiv nnan float %x, %y + ret float %r +} + +define float @fma_different_add_ops(float %a, float %b, float %c, float %d) { +; CHECK-LABEL: @fma_different_add_ops( +; CHECK-NEXT: [[X:%.*]] = call float @llvm.fma.f32(float [[A:%.*]], float [[B:%.*]], float [[C:%.*]]) +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.fma.f32(float [[B]], float [[A]], float [[D:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fdiv nnan float [[X]], [[Y]] +; CHECK-NEXT: ret float [[R]] +; + %x = call float @llvm.fma.f32(float %a, float %b, float %c) + %y = call float @llvm.fma.f32(float %b, float %a, float %d) + %r = fdiv nnan float %x, %y + ret float %r +} + +; TODO: handle >2 args + +define <2 x double> @fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: @fmuladd( +; CHECK-NEXT: [[X:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]]) +; CHECK-NEXT: [[Y:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[B]], <2 x double> [[A]], <2 x double> [[C]]) +; CHECK-NEXT: [[R:%.*]] = fdiv nnan <2 x double> [[X]], [[Y]] +; CHECK-NEXT: ret <2 x double> [[R]] +; + %x = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) + %y = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> %a, <2 x double> %c) + %r = fdiv nnan <2 x double> %x, %y + ret <2 x double> %r +} + declare float @llvm.maxnum.f32(float, float) declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) @@ -1051,3 +1094,6 @@ declare i16 @llvm.smul.fix.i16(i16, i16, i32) declare i16 @llvm.umul.fix.i16(i16, i16, i32) declare <3 x i16> @llvm.smul.fix.sat.v3i16(<3 x i16>, <3 x i16>, i32) declare <3 x i16> @llvm.umul.fix.sat.v3i16(<3 x i16>, <3 x i16>, i32) + +declare float @llvm.fma.f32(float, float, float) +declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) From 5b2926974460c2486df7160e7273782e003812f6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 13:46:51 +0100 Subject: [PATCH 106/465] [X86] Fix llvm-qualified-auto warning by using const auto*. NFC. --- llvm/lib/Target/X86/X86EvexToVex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToVex.cpp index 540ad98b6d54f..8155ce3d0bb66 100644 --- a/llvm/lib/Target/X86/X86EvexToVex.cpp +++ b/llvm/lib/Target/X86/X86EvexToVex.cpp @@ -250,7 +250,7 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const { (Desc.TSFlags & X86II::VEX_L) ? makeArrayRef(X86EvexToVex256CompressTable) : makeArrayRef(X86EvexToVex128CompressTable); - auto I = llvm::lower_bound(Table, MI.getOpcode()); + const auto *I = llvm::lower_bound(Table, MI.getOpcode()); if (I == Table.end() || I->EvexOpcode != MI.getOpcode()) return false; From 23d9f4b958b8bc1f8862cad027b1f7c747bb5375 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 13:52:40 +0100 Subject: [PATCH 107/465] [X86] Fix llvm-qualified-auto warning by using auto*. NFC. --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index ee61787170fc1..840f132ec6664 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3379,7 +3379,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { // Match the shift amount as: (bitwidth - y). It should go away, too. if (ShiftAmt.getOpcode() != ISD::SUB) return false; - auto V0 = dyn_cast(ShiftAmt.getOperand(0)); + auto *V0 = dyn_cast(ShiftAmt.getOperand(0)); if (!V0 || V0->getZExtValue() != Bitwidth) return false; NBits = ShiftAmt.getOperand(1); From 890707aa0169a259ab8572091120756aec2d0c06 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 14:05:43 +0100 Subject: [PATCH 108/465] [X86] Avoid llvm-qualified-auto warning by not using auto. NFC. Try to consistently use the actual type name in the file. --- llvm/lib/Target/X86/X86FixupBWInsts.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp index 78de041329e20..f8d822aebc5b6 100644 --- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp +++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp @@ -187,8 +187,7 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) { /// If so, return that super register in \p SuperDestReg. bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, Register &SuperDestReg) const { - auto *TRI = &TII->getRegisterInfo(); - + const X86RegisterInfo *TRI = &TII->getRegisterInfo(); Register OrigDestReg = OrigMI->getOperand(0).getReg(); SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32); @@ -320,7 +319,7 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const { // This is only correct if we access the same subregister index: otherwise, // we could try to replace "movb %ah, %al" with "movl %eax, %eax". - auto *TRI = &TII->getRegisterInfo(); + const X86RegisterInfo *TRI = &TII->getRegisterInfo(); if (TRI->getSubRegIndex(NewSrcReg, OldSrc.getReg()) != TRI->getSubRegIndex(NewDestReg, OldDest.getReg())) return nullptr; From 2860b2c14b42af1d6204af9a546edc7993680452 Mon Sep 17 00:00:00 2001 From: "Ling, Liyang" Date: Thu, 3 Sep 2020 09:24:43 -0400 Subject: [PATCH 109/465] [mlir] Add Acos, Asin, Atan, Sinh, Cosh, Pow to SPIRVGLSLOps Reviewed By: mravishankar, antiagainst Differential Revision: https://reviews.llvm.org/D86929 --- .../mlir/Dialect/SPIRV/SPIRVGLSLOps.td | 200 ++++++++++++++++++ .../Dialect/SPIRV/Serialization/glsl-ops.mlir | 12 ++ mlir/test/Dialect/SPIRV/glslops.mlir | 99 +++++++++ 3 files changed, 311 insertions(+) diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td index 6064cc3043596..70534111b97f9 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td @@ -265,6 +265,108 @@ def SPV_GLSLTanOp : SPV_GLSLUnaryArithmeticOp<"Tan", 15, SPV_Float16or32> { // ----- +def SPV_GLSLAsinOp : SPV_GLSLUnaryArithmeticOp<"Asin", 16, SPV_Float16or32> { + let summary = "Arc Sine of operand in radians"; + + let description = [{ + The standard trigonometric arc sine of x radians. + + Result is an angle, in radians, whose sine is x. The range of result values + is [-Ï€ / 2, Ï€ / 2]. Result is undefined if abs x > 1. + + The operand x must be a scalar or vector whose component type is 16-bit or + 32-bit floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + asin-op ::= ssa-id `=` `spv.GLSL.Asin` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Asin %0 : f32 + %3 = spv.GLSL.Asin %1 : vector<3xf16> + ``` + }]; +} + +// ----- + +def SPV_GLSLAcosOp : SPV_GLSLUnaryArithmeticOp<"Acos", 17, SPV_Float16or32> { + let summary = "Arc Cosine of operand in radians"; + + let description = [{ + The standard trigonometric arc cosine of x radians. + + Result is an angle, in radians, whose cosine is x. The range of result + values is [0, Ï€]. Result is undefined if abs x > 1. + + The operand x must be a scalar or vector whose component type is 16-bit or + 32-bit floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + acos-op ::= ssa-id `=` `spv.GLSL.Acos` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Acos %0 : f32 + %3 = spv.GLSL.Acos %1 : vector<3xf16> + ``` + }]; +} + +// ----- + +def SPV_GLSLAtanOp : SPV_GLSLUnaryArithmeticOp<"Atan", 18, SPV_Float16or32> { + let summary = "Arc Tangent of operand in radians"; + + let description = [{ + The standard trigonometric arc tangent of x radians. + + Result is an angle, in radians, whose tangent is y_over_x. The range of + result values is [-Ï€ / 2, Ï€ / 2]. + + The operand x must be a scalar or vector whose component type is 16-bit or + 32-bit floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + atan-op ::= ssa-id `=` `spv.GLSL.Atan` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Atan %0 : f32 + %3 = spv.GLSL.Atan %1 : vector<3xf16> + ``` + }]; +} + +// ----- + def SPV_GLSLExpOp : SPV_GLSLUnaryArithmeticOp<"Exp", 27, SPV_Float16or32> { let summary = "Exponentiation of Operand 1"; @@ -513,6 +615,40 @@ def SPV_GLSLSMinOp : SPV_GLSLBinaryArithmeticOp<"SMin", 39, SPV_Integer> { // ----- +def SPV_GLSLPowOp : SPV_GLSLBinaryArithmeticOp<"Pow", 26, SPV_Float16or32> { + let summary = "Return x raised to the y power of two operands"; + + let description = [{ + Result is x raised to the y power; x^y. + + Result is undefined if x = 0 and y ≤ 0. + + The operand x and y must be a scalar or vector whose component type is + 16-bit or 32-bit floating-point. + + Result Type and the type of all operands must be the same type. Results are + computed per component. + + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + pow-op ::= ssa-id `=` `spv.GLSL.Pow` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Pow %0, %1 : f32 + %3 = spv.GLSL.Pow %0, %1 : vector<3xf16> + ``` + }]; +} + +// ----- + def SPV_GLSLFSignOp : SPV_GLSLUnaryArithmeticOp<"FSign", 6, SPV_Float> { let summary = "Returns the sign of the operand"; @@ -602,6 +738,70 @@ def SPV_GLSLSqrtOp : SPV_GLSLUnaryArithmeticOp<"Sqrt", 31, SPV_Float> { // ----- +def SPV_GLSLSinhOp : SPV_GLSLUnaryArithmeticOp<"Sinh", 19, SPV_Float16or32> { + let summary = "Hyperbolic sine of operand in radians"; + + let description = [{ + Hyperbolic sine of x radians. + + The operand x must be a scalar or vector whose component type is 16-bit or + 32-bit floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + sinh-op ::= ssa-id `=` `spv.GLSL.Sinh` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Sinh %0 : f32 + %3 = spv.GLSL.Sinh %1 : vector<3xf16> + ``` + }]; +} + +// ----- + +def SPV_GLSLCoshOp : SPV_GLSLUnaryArithmeticOp<"Cosh", 20, SPV_Float16or32> { + let summary = "Hyperbolic cosine of operand in radians"; + + let description = [{ + Hyperbolic cosine of x radians. + + The operand x must be a scalar or vector whose component type is 16-bit or + 32-bit floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + + ``` + restricted-float-scalar-type ::= `f16` | `f32` + restricted-float-scalar-vector-type ::= + restricted-float-scalar-type | + `vector<` integer-literal `x` restricted-float-scalar-type `>` + cosh-op ::= ssa-id `=` `spv.GLSL.Cosh` ssa-use `:` + restricted-float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Cosh %0 : f32 + %3 = spv.GLSL.Cosh %1 : vector<3xf16> + ``` + }]; +} + +// ----- + def SPV_GLSLTanhOp : SPV_GLSLUnaryArithmeticOp<"Tanh", 21, SPV_Float16or32> { let summary = "Hyperbolic tangent of operand in radians"; diff --git a/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir b/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir index b04195387f12e..9909ef3698e10 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir @@ -14,6 +14,18 @@ spv.module Logical GLSL450 requires #spv.vce { %4 = spv.GLSL.Sin %arg0 : f32 // CHECK: {{%.*}} = spv.GLSL.Tan {{%.*}} : f32 %5 = spv.GLSL.Tan %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Acos {{%.*}} : f32 + %6 = spv.GLSL.Acos %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Asin {{%.*}} : f32 + %7 = spv.GLSL.Asin %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Atan {{%.*}} : f32 + %8 = spv.GLSL.Atan %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Sinh {{%.*}} : f32 + %9 = spv.GLSL.Sinh %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Cosh {{%.*}} : f32 + %10 = spv.GLSL.Cosh %arg0 : f32 + // CHECK: {{%.*}} = spv.GLSL.Pow {{%.*}} : f32 + %11 = spv.GLSL.Pow %arg0, %arg1 : f32 spv.Return } } diff --git a/mlir/test/Dialect/SPIRV/glslops.mlir b/mlir/test/Dialect/SPIRV/glslops.mlir index 1e7b18ef71ffb..a8df3710a1d82 100644 --- a/mlir/test/Dialect/SPIRV/glslops.mlir +++ b/mlir/test/Dialect/SPIRV/glslops.mlir @@ -155,3 +155,102 @@ func @tanvec(%arg0 : vector<3xf16>) -> () { %2 = spv.GLSL.Tan %arg0 : vector<3xf16> return } + +//===----------------------------------------------------------------------===// +// spv.GLSL.Acos +//===----------------------------------------------------------------------===// + +func @acos(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Acos {{%.*}} : f32 + %2 = spv.GLSL.Acos %arg0 : f32 + return +} + +func @acosvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Acos {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Acos %arg0 : vector<3xf16> + return +} + +//===----------------------------------------------------------------------===// +// spv.GLSL.Asin +//===----------------------------------------------------------------------===// + +func @asin(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Asin {{%.*}} : f32 + %2 = spv.GLSL.Asin %arg0 : f32 + return +} + +func @asinvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Asin {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Asin %arg0 : vector<3xf16> + return +} + +//===----------------------------------------------------------------------===// +// spv.GLSL.Atan +//===----------------------------------------------------------------------===// + +func @atan(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Atan {{%.*}} : f32 + %2 = spv.GLSL.Atan %arg0 : f32 + return +} + +func @atanvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Atan {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Atan %arg0 : vector<3xf16> + return +} + +//===----------------------------------------------------------------------===// +// spv.GLSL.Sinh +//===----------------------------------------------------------------------===// + +func @sinh(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Sinh {{%.*}} : f32 + %2 = spv.GLSL.Sinh %arg0 : f32 + return +} + +func @sinhvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Sinh {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Sinh %arg0 : vector<3xf16> + return +} + +//===----------------------------------------------------------------------===// +// spv.GLSL.Cosh +//===----------------------------------------------------------------------===// + +func @cosh(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Cosh {{%.*}} : f32 + %2 = spv.GLSL.Cosh %arg0 : f32 + return +} + +func @coshvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Cosh {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Cosh %arg0 : vector<3xf16> + return +} + +//===----------------------------------------------------------------------===// +// spv.GLSL.Pow +//===----------------------------------------------------------------------===// + +func @pow(%arg0 : f32, %arg1 : f32) -> () { + // CHECK: spv.GLSL.Pow {{%.*}}, {{%.*}} : f32 + %2 = spv.GLSL.Pow %arg0, %arg1 : f32 + return +} + +func @powvec(%arg0 : vector<3xf16>, %arg1 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Pow {{%.*}}, {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Pow %arg0, %arg1 : vector<3xf16> + return +} + +// ----- + From 91848b11b4cce3e0583ab0f1ab12333661f9c7ba Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 14:39:57 +0100 Subject: [PATCH 110/465] LowerEmuTLS.cpp - remove unused TargetLowering.h include. NFC. We only needed llvm/IR/Constants.h. --- llvm/lib/CodeGen/LowerEmuTLS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/LowerEmuTLS.cpp b/llvm/lib/CodeGen/LowerEmuTLS.cpp index 36b863178b474..0afdee45cda76 100644 --- a/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -15,8 +15,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" From 1e21ca4d25f9743a0efdb1775812da84f8f98fa2 Mon Sep 17 00:00:00 2001 From: Zhibin Li Date: Thu, 3 Sep 2020 09:29:07 -0400 Subject: [PATCH 111/465] [spirv] Add SPIR-V GLSL extended Round op Reviewed By: mravishankar, antiagainst Differential Revision: https://reviews.llvm.org/D86914 --- .../mlir/Dialect/SPIRV/SPIRVGLSLOps.td | 30 +++++++++++++++++++ .../Dialect/SPIRV/Serialization/glsl-ops.mlir | 2 ++ mlir/test/Dialect/SPIRV/glslops.mlir | 15 ++++++++++ 3 files changed, 47 insertions(+) diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td index 70534111b97f9..10cafd8251166 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td @@ -430,6 +430,36 @@ def SPV_GLSLFloorOp : SPV_GLSLUnaryArithmeticOp<"Floor", 8, SPV_Float> { // ----- +def SPV_GLSLRoundOp: SPV_GLSLUnaryArithmeticOp<"Round", 1, SPV_Float> { + let summary = "Rounds to the whole number"; + + let description = [{ + Result is the value equal to the nearest whole number. + + The operand x must be a scalar or vector whose component type is + floating-point. + + Result Type and the type of x must be the same type. Results are computed + per component. + + + ``` + float-scalar-vector-type ::= float-type | + `vector<` integer-literal `x` float-type `>` + floor-op ::= ssa-id `=` `spv.GLSL.Round` ssa-use `:` + float-scalar-vector-type + ``` + #### Example: + + ```mlir + %2 = spv.GLSL.Round %0 : f32 + %3 = spv.GLSL.Round %1 : vector<3xf16> + ``` + }]; +} + +// ----- + def SPV_GLSLInverseSqrtOp : SPV_GLSLUnaryArithmeticOp<"InverseSqrt", 32, SPV_Float> { let summary = "Reciprocal of sqrt(operand)"; diff --git a/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir b/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir index 9909ef3698e10..223b6301207da 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir @@ -26,6 +26,8 @@ spv.module Logical GLSL450 requires #spv.vce { %10 = spv.GLSL.Cosh %arg0 : f32 // CHECK: {{%.*}} = spv.GLSL.Pow {{%.*}} : f32 %11 = spv.GLSL.Pow %arg0, %arg1 : f32 + // CHECK: {{%.*}} = spv.GLSL.Round {{%.*}} : f32 + %12 = spv.GLSL.Round %arg0 : f32 spv.Return } } diff --git a/mlir/test/Dialect/SPIRV/glslops.mlir b/mlir/test/Dialect/SPIRV/glslops.mlir index a8df3710a1d82..3e699ed05958c 100644 --- a/mlir/test/Dialect/SPIRV/glslops.mlir +++ b/mlir/test/Dialect/SPIRV/glslops.mlir @@ -254,3 +254,18 @@ func @powvec(%arg0 : vector<3xf16>, %arg1 : vector<3xf16>) -> () { // ----- +//===----------------------------------------------------------------------===// +// spv.GLSL.Round +//===----------------------------------------------------------------------===// + +func @round(%arg0 : f32) -> () { + // CHECK: spv.GLSL.Round {{%.*}} : f32 + %2 = spv.GLSL.Round %arg0 : f32 + return +} + +func @roundvec(%arg0 : vector<3xf16>) -> () { + // CHECK: spv.GLSL.Round {{%.*}} : vector<3xf16> + %2 = spv.GLSL.Round %arg0 : vector<3xf16> + return +} From 7939b76e2a7b1fbc288f6d700bdbe53c581b58a6 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 3 Sep 2020 09:44:37 -0400 Subject: [PATCH 112/465] [mlir] Support default valued attribute in StructsGen Its handling is similar to optional attributes, except for the getter method. Reviewed By: rsuderman Differential Revision: https://reviews.llvm.org/D87055 --- mlir/tools/mlir-tblgen/StructsGen.cpp | 35 +++++++++++++++++----- mlir/unittests/TableGen/StructsGenTest.cpp | 12 +++++++- mlir/unittests/TableGen/structs.td | 4 ++- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/mlir/tools/mlir-tblgen/StructsGen.cpp b/mlir/tools/mlir-tblgen/StructsGen.cpp index cccacc0cad853..2606dfe3696bd 100644 --- a/mlir/tools/mlir-tblgen/StructsGen.cpp +++ b/mlir/tools/mlir-tblgen/StructsGen.cpp @@ -143,7 +143,7 @@ static void emitFactoryDef(llvm::StringRef structName, )"; for (auto field : fields) { - if (field.getType().isOptional()) + if (field.getType().isOptional() || field.getType().hasDefaultValue()) os << llvm::formatv(getFieldInfoOptional, field.getName()); else os << llvm::formatv(getFieldInfo, field.getName()); @@ -169,7 +169,7 @@ bool {0}::classof(::mlir::Attribute attr))"; auto derived = attr.dyn_cast<::mlir::DictionaryAttr>(); if (!derived) return false; - int empty_optionals = 0; + int num_absent_attrs = 0; )"; os << llvm::formatv(classofInfo, structName) << " {"; @@ -184,7 +184,7 @@ bool {0}::classof(::mlir::Attribute attr))"; const char *classofArgInfoOptional = R"( auto {0} = derived.get("{0}"); if (!{0}) - ++empty_optionals; + ++num_absent_attrs; else if (!({1})) return false; )"; @@ -193,14 +193,14 @@ bool {0}::classof(::mlir::Attribute attr))"; auto type = field.getType(); std::string condition = std::string(tgfmt(type.getConditionTemplate(), &fctx.withSelf(name))); - if (type.isOptional()) + if (type.isOptional() || type.hasDefaultValue()) os << llvm::formatv(classofArgInfoOptional, name, condition); else os << llvm::formatv(classofArgInfo, name, condition); } const char *classofEndInfo = R"( - return derived.size() + empty_optionals == {0}; + return derived.size() + num_absent_attrs == {0}; } )"; os << llvm::formatv(classofEndInfo, fields.size()); @@ -229,14 +229,35 @@ emitAccessorDef(llvm::StringRef structName, return {1}.cast<{0}>(); } )"; + const char *fieldInfoDefaultValued = R"( +{0} {2}::{1}() const { + auto derived = this->cast<::mlir::DictionaryAttr>(); + auto {1} = derived.get("{1}"); + if (!{1}) { + ::mlir::Builder builder(getContext()); + return {3}; + } + assert({1}.isa<{0}>() && "incorrect Attribute type found."); + return {1}.cast<{0}>(); +} +)"; + FmtContext fmtCtx; + fmtCtx.withBuilder("builder"); + for (auto field : fields) { auto name = field.getName(); auto type = field.getType(); auto storage = type.getStorageType(); - if (type.isOptional()) + if (type.isOptional()) { os << llvm::formatv(fieldInfoOptional, storage, name, structName); - else + } else if (type.hasDefaultValue()) { + std::string defaultValue = tgfmt(type.getConstBuilderTemplate(), &fmtCtx, + type.getDefaultValue()); + os << llvm::formatv(fieldInfoDefaultValued, storage, name, structName, + defaultValue); + } else { os << llvm::formatv(fieldInfo, storage, name, structName); + } } } diff --git a/mlir/unittests/TableGen/StructsGenTest.cpp b/mlir/unittests/TableGen/StructsGenTest.cpp index 14b0abc675bff..d2acb28ebfb13 100644 --- a/mlir/unittests/TableGen/StructsGenTest.cpp +++ b/mlir/unittests/TableGen/StructsGenTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" #include "mlir/IR/Identifier.h" #include "mlir/IR/StandardTypes.h" #include "llvm/ADT/DenseMap.h" @@ -34,9 +35,10 @@ static test::TestStruct getTestStruct(mlir::MLIRContext *context) { auto elementsAttr = mlir::DenseIntElementsAttr::get(elementsType, {1, 2, 3, 4, 5, 6}); auto optionalAttr = nullptr; + auto defaultValuedAttr = nullptr; return test::TestStruct::get(integerAttr, floatAttr, elementsAttr, - optionalAttr, context); + optionalAttr, defaultValuedAttr, context); } /// Validates that test::TestStruct::classof correctly identifies a valid @@ -167,4 +169,12 @@ TEST(StructsGenTest, EmptyOptional) { EXPECT_EQ(structAttr.sample_optional_integer(), nullptr); } +TEST(StructsGenTest, GetDefaultValuedAttr) { + mlir::MLIRContext context; + mlir::Builder builder(&context); + auto structAttr = getTestStruct(&context); + EXPECT_EQ(structAttr.sample_default_valued_integer(), + builder.getI32IntegerAttr(42)); +} + } // namespace mlir diff --git a/mlir/unittests/TableGen/structs.td b/mlir/unittests/TableGen/structs.td index cf5e4f5448f00..06a15e181484f 100644 --- a/mlir/unittests/TableGen/structs.td +++ b/mlir/unittests/TableGen/structs.td @@ -17,6 +17,8 @@ def Test_Struct : StructAttr<"TestStruct", Test_Dialect, [ StructFieldAttr<"sample_float", F32Attr>, StructFieldAttr<"sample_elements", I32ElementsAttr>, StructFieldAttr<"sample_optional_integer", - OptionalAttr>] > { + OptionalAttr>, + StructFieldAttr<"sample_default_valued_integer", + DefaultValuedAttr>] > { let description = "Structure for test data"; } From dfb7b3fe02c0e84968960f4aef88361e6de10874 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 3 Sep 2020 13:49:04 +0200 Subject: [PATCH 113/465] [mlir][VectorOps] Fall back to a loop when accessing a vector from a strided memref The scalar loop is slow but correct. Differential Revision: https://reviews.llvm.org/D87082 --- .../Conversion/VectorToSCF/VectorToSCF.cpp | 20 +++++++++++++-- .../VectorToSCF/vector-to-loops.mlir | 25 +++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 267aea90cc9d7..3c501f046f074 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -547,7 +547,15 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( using namespace mlir::edsc::op; TransferReadOp transfer = cast(op); - if (transfer.permutation_map().isMinorIdentity()) { + + // Fall back to a loop if the fastest varying stride is not 1 or it is + // permuted. + int64_t offset; + SmallVector strides; + auto successStrides = + getStridesAndOffset(transfer.getMemRefType(), strides, offset); + if (succeeded(successStrides) && strides.back() == 1 && + transfer.permutation_map().isMinorIdentity()) { // If > 1D, emit a bunch of loops around 1-D vector transfers. if (transfer.getVectorType().getRank() > 1) return NDTransferOpHelper(rewriter, transfer, options) @@ -621,7 +629,15 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( using namespace edsc::op; TransferWriteOp transfer = cast(op); - if (transfer.permutation_map().isMinorIdentity()) { + + // Fall back to a loop if the fastest varying stride is not 1 or it is + // permuted. + int64_t offset; + SmallVector strides; + auto successStrides = + getStridesAndOffset(transfer.getMemRefType(), strides, offset); + if (succeeded(successStrides) && strides.back() == 1 && + transfer.permutation_map().isMinorIdentity()) { // If > 1D, emit a bunch of loops around 1-D vector transfers. if (transfer.getVectorType().getRank() > 1) return NDTransferOpHelper(rewriter, transfer, options) diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir index b19ea9dde7933..986bfe1763515 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -457,3 +457,28 @@ func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref (d0 + d1 * 8)>>) -> vector<4xf32> { + %c0 = constant 0 : index + %f0 = constant 0.0 : f32 + %0 = vector.transfer_read %A[%c0, %c0], %f0 + : memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>, vector<4xf32> + return %0 : vector<4xf32> +} + +// CHECK-LABEL: transfer_read_strided( +// CHECK: scf.for +// CHECK: load + +func @transfer_write_strided(%A : vector<4xf32>, %B : memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>) { + %c0 = constant 0 : index + vector.transfer_write %A, %B[%c0, %c0] : + vector<4xf32>, memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>> + return +} + +// CHECK-LABEL: transfer_write_strided( +// CHECK: scf.for +// CHECK: store From db367ff2d8d038f61d09521d5e6a293528df3d54 Mon Sep 17 00:00:00 2001 From: Victor Huang Date: Thu, 3 Sep 2020 09:08:24 -0500 Subject: [PATCH 114/465] [LLD][PowerPC][test] Fix out-of-memory issue running ppc64-pcrel-long-branch-error.s Following 97febb1, fix the out-of-memory error associated with buffering the output in-memory by writing to an allocated file with the minimum offset and running it on ppc system-linux only. Peer reviewed by: nemanjai --- lld/test/ELF/ppc64-pcrel-long-branch-error.s | 21 ++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/lld/test/ELF/ppc64-pcrel-long-branch-error.s b/lld/test/ELF/ppc64-pcrel-long-branch-error.s index b9904909742d0..2db9d15b70d07 100644 --- a/lld/test/ELF/ppc64-pcrel-long-branch-error.s +++ b/lld/test/ELF/ppc64-pcrel-long-branch-error.s @@ -1,22 +1,23 @@ -## The test as-is needs a large heap size. -## Disabled until we know how to check for that prerequisite. -# UNSUPPORTED: ppc - -# REQUIRES: ppc +# REQUIRES: ppc, system-linux # RUN: echo 'SECTIONS { \ # RUN: .text_low 0x2000: { *(.text_low) } \ -# RUN: .text_high 0x800002000 : { *(.text_high) } \ +# RUN: .text_high 0x200002010 : { *(.text_high) } \ # RUN: }' > %t.script +## In this test, we do not use -o /dev/null like other similar cases do since +## it will fail in some enviroments with out-of-memory errors associated with +## buffering the output in memeory. The test is enabled for ppc linux only since +## writing to an allocated file will cause time out error for this case on freebsd. + # RUN: llvm-mc -filetype=obj -triple=ppc64le %s -o %t.o -# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -T %t.script %t.o -o %t 2>&1 | FileCheck %s # RUN: llvm-mc -filetype=obj -triple=ppc64le -defsym HIDDEN=1 %s -o %t.o -# RUN: not ld.lld -shared -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -shared -T %t.script %t.o -o %t 2>&1 | FileCheck %s # RUN: llvm-mc -filetype=obj -triple=ppc64 %s -o %t.o -# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -T %t.script %t.o -o %t 2>&1 | FileCheck %s # RUN: llvm-mc -filetype=obj -triple=ppc64 -defsym HIDDEN=1 %s -o %t.o -# RUN: not ld.lld -shared -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -shared -T %t.script %t.o -o %t 2>&1 | FileCheck %s # CHECK: error: offset overflow 34 bits, please compile using the large code model From 4013cfd34df363dfd8d963375d72349c5da2623c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 3 Sep 2020 09:27:16 -0400 Subject: [PATCH 115/465] [GVN] add tests for >2 arg commutable intrinsics; NFC --- llvm/test/Transforms/GVN/commute.ll | 44 ++++++++++++++++++++++++ llvm/test/Transforms/NewGVN/commute.ll | 46 +++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/GVN/commute.ll b/llvm/test/Transforms/GVN/commute.ll index 72506c0ece283..d0f26f6e27e65 100644 --- a/llvm/test/Transforms/GVN/commute.ll +++ b/llvm/test/Transforms/GVN/commute.ll @@ -67,3 +67,47 @@ if.end: %umul = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y) ret { i32, i1 } %umul } + +declare i16 @llvm.smul.fix.i16(i16, i16, i32) +declare i16 @llvm.umul.fix.i16(i16, i16, i32) + +define i16 @intrinsic_3_args(i16 %x, i16 %y) { +; CHECK-LABEL: @intrinsic_3_args( +; CHECK-NEXT: [[M1:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i32 1) +; CHECK-NEXT: [[M2:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[Y]], i16 [[X]], i32 1) +; CHECK-NEXT: [[R:%.*]] = sub i16 [[M1]], [[M2]] +; CHECK-NEXT: ret i16 [[R]] +; + %m1 = call i16 @llvm.smul.fix.i16(i16 %x, i16 %y, i32 1) + %m2 = call i16 @llvm.smul.fix.i16(i16 %y, i16 %x, i32 1) + %r = sub i16 %m1, %m2 + ret i16 %r +} + +define i16 @intrinsic_3_args_not_same(i16 %x, i16 %y) { +; CHECK-LABEL: @intrinsic_3_args_not_same( +; CHECK-NEXT: [[M1:%.*]] = call i16 @llvm.umul.fix.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i32 2) +; CHECK-NEXT: [[M2:%.*]] = call i16 @llvm.umul.fix.i16(i16 [[Y]], i16 [[X]], i32 1) +; CHECK-NEXT: [[R:%.*]] = sub i16 [[M1]], [[M2]] +; CHECK-NEXT: ret i16 [[R]] +; + %m1 = call i16 @llvm.umul.fix.i16(i16 %x, i16 %y, i32 2) + %m2 = call i16 @llvm.umul.fix.i16(i16 %y, i16 %x, i32 1) + %r = sub i16 %m1, %m2 + ret i16 %r +} + +declare float @llvm.fma.f32(float, float, float) + +define float @fma(float %x, float %y) { +; CHECK-LABEL: @fma( +; CHECK-NEXT: [[M1:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00) +; CHECK-NEXT: [[M2:%.*]] = call float @llvm.fma.f32(float [[Y]], float [[X]], float 1.000000e+00) +; CHECK-NEXT: [[R:%.*]] = fdiv nnan float [[M1]], [[M2]] +; CHECK-NEXT: ret float [[R]] +; + %m1 = call float @llvm.fma.f32(float %x, float %y, float 1.0) + %m2 = call float @llvm.fma.f32(float %y, float %x, float 1.0) + %r = fdiv nnan float %m1, %m2 + ret float %r +} diff --git a/llvm/test/Transforms/NewGVN/commute.ll b/llvm/test/Transforms/NewGVN/commute.ll index d7737dd04f97a..f4b53621e4206 100644 --- a/llvm/test/Transforms/NewGVN/commute.ll +++ b/llvm/test/Transforms/NewGVN/commute.ll @@ -29,6 +29,8 @@ define void @cmp(i32 %x, i32 %y) { ret void } +declare i32 @llvm.smax.i32(i32, i32) + define void @intrinsic(i32 %x, i32 %y) { ; CHECK-LABEL: @intrinsic( ; CHECK-NEXT: [[M1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) @@ -42,4 +44,46 @@ define void @intrinsic(i32 %x, i32 %y) { ret void } -declare i32 @llvm.smax.i32(i32, i32) +declare i16 @llvm.smul.fix.i16(i16, i16, i32) +declare i16 @llvm.umul.fix.i16(i16, i16, i32) + +define i16 @intrinsic_3_args(i16 %x, i16 %y) { +; CHECK-LABEL: @intrinsic_3_args( +; CHECK-NEXT: [[M1:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i32 1) +; CHECK-NEXT: [[M2:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[Y]], i16 [[X]], i32 1) +; CHECK-NEXT: [[R:%.*]] = sub i16 [[M1]], [[M2]] +; CHECK-NEXT: ret i16 [[R]] +; + %m1 = call i16 @llvm.smul.fix.i16(i16 %x, i16 %y, i32 1) + %m2 = call i16 @llvm.smul.fix.i16(i16 %y, i16 %x, i32 1) + %r = sub i16 %m1, %m2 + ret i16 %r +} + +define i16 @intrinsic_3_args_not_same(i16 %x, i16 %y) { +; CHECK-LABEL: @intrinsic_3_args_not_same( +; CHECK-NEXT: [[M1:%.*]] = call i16 @llvm.umul.fix.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i32 2) +; CHECK-NEXT: [[M2:%.*]] = call i16 @llvm.umul.fix.i16(i16 [[Y]], i16 [[X]], i32 1) +; CHECK-NEXT: [[R:%.*]] = sub i16 [[M1]], [[M2]] +; CHECK-NEXT: ret i16 [[R]] +; + %m1 = call i16 @llvm.umul.fix.i16(i16 %x, i16 %y, i32 2) + %m2 = call i16 @llvm.umul.fix.i16(i16 %y, i16 %x, i32 1) + %r = sub i16 %m1, %m2 + ret i16 %r +} + +declare float @llvm.fma.f32(float, float, float) + +define float @fma(float %x, float %y) { +; CHECK-LABEL: @fma( +; CHECK-NEXT: [[M1:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00) +; CHECK-NEXT: [[M2:%.*]] = call float @llvm.fma.f32(float [[Y]], float [[X]], float 1.000000e+00) +; CHECK-NEXT: [[R:%.*]] = fdiv nnan float [[M1]], [[M2]] +; CHECK-NEXT: ret float [[R]] +; + %m1 = call float @llvm.fma.f32(float %x, float %y, float 1.0) + %m2 = call float @llvm.fma.f32(float %y, float %x, float 1.0) + %r = fdiv nnan float %m1, %m2 + ret float %r +} From bdd5bfd0e434637c44916fe2072b1d80fa022092 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 3 Sep 2020 10:12:59 -0400 Subject: [PATCH 116/465] [IR][GVN] add/allow commutative intrinsics with >2 args Follow-up to D86798 and rGe25449f. --- llvm/include/llvm/IR/IntrinsicInst.h | 13 ++++++------- llvm/lib/Transforms/Scalar/GVN.cpp | 8 ++------ llvm/test/Transforms/GVN/commute.ll | 10 ++++------ 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index c29d20c1729ba..9ba9ea68f9898 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -70,13 +70,12 @@ class IntrinsicInst : public CallInst { case Intrinsic::uadd_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: - // TODO: These fixed-point math intrinsics have commutative first two - // operands, but callers may not handle instructions with more than - // two operands. - // case Intrinsic::smul_fix: - // case Intrinsic::umul_fix: - // case Intrinsic::smul_fix_sat: - // case Intrinsic::umul_fix_sat: + case Intrinsic::smul_fix: + case Intrinsic::umul_fix: + case Intrinsic::smul_fix_sat: + case Intrinsic::umul_fix_sat: + case Intrinsic::fma: + case Intrinsic::fmuladd: return true; default: return false; diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index f8962c0852249..c71038d66f995 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -295,9 +295,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { // of their operands get the same value number by sorting the operand value // numbers. Since commutative operands are the 1st two operands it is more // efficient to sort by hand rather than using, say, std::sort. - assert(((isa(I) && I->getNumOperands() == 2) || - (isa(I) && I->getNumOperands() == 3)) - && "Unsupported commutative instruction!"); + assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!"); if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); e.commutative = true; @@ -1840,9 +1838,7 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, } if (Exp.commutative) { - assert((Exp.varargs.size() == 2 || - (Exp.opcode == Instruction::Call && Exp.varargs.size() == 3)) - && "Unsupported commutative instruction!"); + assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!"); if (Exp.varargs[0] > Exp.varargs[1]) { std::swap(Exp.varargs[0], Exp.varargs[1]); uint32_t Opcode = Exp.opcode >> 8; diff --git a/llvm/test/Transforms/GVN/commute.ll b/llvm/test/Transforms/GVN/commute.ll index d0f26f6e27e65..c76318db56a47 100644 --- a/llvm/test/Transforms/GVN/commute.ll +++ b/llvm/test/Transforms/GVN/commute.ll @@ -74,9 +74,7 @@ declare i16 @llvm.umul.fix.i16(i16, i16, i32) define i16 @intrinsic_3_args(i16 %x, i16 %y) { ; CHECK-LABEL: @intrinsic_3_args( ; CHECK-NEXT: [[M1:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i32 1) -; CHECK-NEXT: [[M2:%.*]] = call i16 @llvm.smul.fix.i16(i16 [[Y]], i16 [[X]], i32 1) -; CHECK-NEXT: [[R:%.*]] = sub i16 [[M1]], [[M2]] -; CHECK-NEXT: ret i16 [[R]] +; CHECK-NEXT: ret i16 0 ; %m1 = call i16 @llvm.smul.fix.i16(i16 %x, i16 %y, i32 1) %m2 = call i16 @llvm.smul.fix.i16(i16 %y, i16 %x, i32 1) @@ -84,6 +82,8 @@ define i16 @intrinsic_3_args(i16 %x, i16 %y) { ret i16 %r } +; Negative test - 3rd arg is different + define i16 @intrinsic_3_args_not_same(i16 %x, i16 %y) { ; CHECK-LABEL: @intrinsic_3_args_not_same( ; CHECK-NEXT: [[M1:%.*]] = call i16 @llvm.umul.fix.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i32 2) @@ -102,9 +102,7 @@ declare float @llvm.fma.f32(float, float, float) define float @fma(float %x, float %y) { ; CHECK-LABEL: @fma( ; CHECK-NEXT: [[M1:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00) -; CHECK-NEXT: [[M2:%.*]] = call float @llvm.fma.f32(float [[Y]], float [[X]], float 1.000000e+00) -; CHECK-NEXT: [[R:%.*]] = fdiv nnan float [[M1]], [[M2]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 1.000000e+00 ; %m1 = call float @llvm.fma.f32(float %x, float %y, float 1.0) %m2 = call float @llvm.fma.f32(float %y, float %x, float 1.0) From 0563cd6739c9516f0fefd02b1cbf5e82ef11da4f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 15:11:56 +0100 Subject: [PATCH 117/465] Fix spelling mistake. NFC. --- llvm/lib/Target/X86/X86TargetObjectFile.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86TargetObjectFile.h b/llvm/lib/Target/X86/X86TargetObjectFile.h index acea772eb036d..f4bf52c83771f 100644 --- a/llvm/lib/Target/X86/X86TargetObjectFile.h +++ b/llvm/lib/Target/X86/X86TargetObjectFile.h @@ -36,7 +36,7 @@ namespace llvm { MCStreamer &Streamer) const override; }; - /// This implemenatation is used for X86 ELF targets that don't + /// This implementation is used for X86 ELF targets that don't /// have a further specialization. class X86ELFTargetObjectFile : public TargetLoweringObjectFileELF { public: From 58afaecdc23148219d18d566dbaf79f37950f716 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 15:17:30 +0100 Subject: [PATCH 118/465] X86/X86TargetObjectFile.cpp - remove unused headers. NFCI. --- llvm/lib/Target/X86/X86TargetObjectFile.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/llvm/lib/Target/X86/X86TargetObjectFile.cpp index 2b48baccc01fc..b88ad5a478f39 100644 --- a/llvm/lib/Target/X86/X86TargetObjectFile.cpp +++ b/llvm/lib/Target/X86/X86TargetObjectFile.cpp @@ -7,16 +7,8 @@ //===----------------------------------------------------------------------===// #include "X86TargetObjectFile.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/IR/Mangler.h" -#include "llvm/IR/Operator.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionCOFF.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Target/TargetMachine.h" From d4f3903131292d36b3bc22c28798b8e9dae20af6 Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Wed, 2 Sep 2020 14:10:22 +0000 Subject: [PATCH 119/465] [libTooling] Provide overloads of `rewriteDescendants` that operate directly on an AST node. The new overloads apply directly to a node, like the `clang::ast_matchers::match` functions, Rather than generating an `EditGenerator` combinator. Differential Revision: https://reviews.llvm.org/D87031 --- .../clang/Tooling/Transformer/RewriteRule.h | 32 +++++++ clang/lib/Tooling/Transformer/RewriteRule.cpp | 51 ++++++++---- clang/unittests/Tooling/TransformerTest.cpp | 83 +++++++++++++++++++ 3 files changed, 152 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/Tooling/Transformer/RewriteRule.h b/clang/include/clang/Tooling/Transformer/RewriteRule.h index 9700d1ff539de..4bdcc8d5c3296 100644 --- a/clang/include/clang/Tooling/Transformer/RewriteRule.h +++ b/clang/include/clang/Tooling/Transformer/RewriteRule.h @@ -380,6 +380,38 @@ EditGenerator rewriteDescendants(std::string NodeId, RewriteRule Rule); // RewriteRule API. Recast them as such. Or, just declare these functions // public and well-supported and move them out of `detail`. namespace detail { +/// The following overload set is a version of `rewriteDescendants` that +/// operates directly on the AST, rather than generating a Transformer +/// combinator. It applies `Rule` to all descendants of `Node`, although not +/// `Node` itself. `Rule` can refer to nodes bound in `Result`. +/// +/// For example, assuming that "body" is bound to a function body in MatchResult +/// `Results`, this will produce edits to change all appearances of `x` in that +/// body to `3`. +/// ``` +/// auto InlineX = +/// makeRule(declRefExpr(to(varDecl(hasName("x")))), changeTo(cat("3"))); +/// const auto *Node = Results.Nodes.getNodeAs("body"); +/// auto Edits = rewriteDescendants(*Node, InlineX, Results); +/// ``` +/// @{ +llvm::Expected> +rewriteDescendants(const Decl &Node, RewriteRule Rule, + const ast_matchers::MatchFinder::MatchResult &Result); + +llvm::Expected> +rewriteDescendants(const Stmt &Node, RewriteRule Rule, + const ast_matchers::MatchFinder::MatchResult &Result); + +llvm::Expected> +rewriteDescendants(const TypeLoc &Node, RewriteRule Rule, + const ast_matchers::MatchFinder::MatchResult &Result); + +llvm::Expected> +rewriteDescendants(const DynTypedNode &Node, RewriteRule Rule, + const ast_matchers::MatchFinder::MatchResult &Result); +/// @} + /// Builds a single matcher for the rule, covering all of the rule's cases. /// Only supports Rules whose cases' matchers share the same base "kind" /// (`Stmt`, `Decl`, etc.) Deprecated: use `buildMatchers` instead, which diff --git a/clang/lib/Tooling/Transformer/RewriteRule.cpp b/clang/lib/Tooling/Transformer/RewriteRule.cpp index 594e22f56b874..03921e0ea7de5 100644 --- a/clang/lib/Tooling/Transformer/RewriteRule.cpp +++ b/clang/lib/Tooling/Transformer/RewriteRule.cpp @@ -242,7 +242,7 @@ class ApplyRuleCallback : public MatchFinder::MatchCallback { } // namespace template -static llvm::Expected> +llvm::Expected> rewriteDescendantsImpl(const T &Node, RewriteRule Rule, const MatchResult &Result) { ApplyRuleCallback Callback(std::move(Rule)); @@ -252,10 +252,43 @@ rewriteDescendantsImpl(const T &Node, RewriteRule Rule, return std::move(Callback.Edits); } +llvm::Expected> +transformer::detail::rewriteDescendants(const Decl &Node, RewriteRule Rule, + const MatchResult &Result) { + return rewriteDescendantsImpl(Node, std::move(Rule), Result); +} + +llvm::Expected> +transformer::detail::rewriteDescendants(const Stmt &Node, RewriteRule Rule, + const MatchResult &Result) { + return rewriteDescendantsImpl(Node, std::move(Rule), Result); +} + +llvm::Expected> +transformer::detail::rewriteDescendants(const TypeLoc &Node, RewriteRule Rule, + const MatchResult &Result) { + return rewriteDescendantsImpl(Node, std::move(Rule), Result); +} + +llvm::Expected> +transformer::detail::rewriteDescendants(const DynTypedNode &DNode, + RewriteRule Rule, + const MatchResult &Result) { + if (const auto *Node = DNode.get()) + return rewriteDescendantsImpl(*Node, std::move(Rule), Result); + if (const auto *Node = DNode.get()) + return rewriteDescendantsImpl(*Node, std::move(Rule), Result); + if (const auto *Node = DNode.get()) + return rewriteDescendantsImpl(*Node, std::move(Rule), Result); + + return llvm::make_error( + llvm::errc::invalid_argument, + "type unsupported for recursive rewriting, Kind=" + + DNode.getNodeKind().asStringRef()); +} + EditGenerator transformer::rewriteDescendants(std::string NodeId, RewriteRule Rule) { - // FIXME: warn or return error if `Rule` contains any `AddedIncludes`, since - // these will be dropped. return [NodeId = std::move(NodeId), Rule = std::move(Rule)](const MatchResult &Result) -> llvm::Expected> { @@ -265,17 +298,7 @@ EditGenerator transformer::rewriteDescendants(std::string NodeId, if (It == NodesMap.end()) return llvm::make_error(llvm::errc::invalid_argument, "ID not bound: " + NodeId); - if (auto *Node = It->second.get()) - return rewriteDescendantsImpl(*Node, std::move(Rule), Result); - if (auto *Node = It->second.get()) - return rewriteDescendantsImpl(*Node, std::move(Rule), Result); - if (auto *Node = It->second.get()) - return rewriteDescendantsImpl(*Node, std::move(Rule), Result); - - return llvm::make_error( - llvm::errc::invalid_argument, - "type unsupported for recursive rewriting, ID=\"" + NodeId + - "\", Kind=" + It->second.getNodeKind().asStringRef()); + return detail::rewriteDescendants(It->second, std::move(Rule), Result); }; } diff --git a/clang/unittests/Tooling/TransformerTest.cpp b/clang/unittests/Tooling/TransformerTest.cpp index 2c9bd7dfd32de..a8d6d3dd851da 100644 --- a/clang/unittests/Tooling/TransformerTest.cpp +++ b/clang/unittests/Tooling/TransformerTest.cpp @@ -25,6 +25,7 @@ using ::testing::ElementsAre; using ::testing::IsEmpty; using transformer::cat; using transformer::changeTo; +using transformer::rewriteDescendants; using transformer::RewriteRule; constexpr char KHeaderContents[] = R"cc( @@ -568,6 +569,88 @@ TEST_F(TransformerTest, RewriteDescendantsInvalidNodeType) { EXPECT_EQ(ErrorCount, 1); } +// +// We include one test per typed overload. We don't test extensively since that +// is already covered by the tests above. +// + +TEST_F(TransformerTest, RewriteDescendantsTypedStmt) { + // Add an unrelated definition to the header that also has a variable named + // "x", to test that the rewrite is limited to the scope we intend. + appendToHeader(R"cc(int g(int x) { return x; })cc"); + std::string Input = + "int f(int x) { int y = x; { int z = x * x; } return x; }"; + std::string Expected = + "int f(int x) { int y = 3; { int z = 3 * 3; } return 3; }"; + auto InlineX = + makeRule(declRefExpr(to(varDecl(hasName("x")))), changeTo(cat("3"))); + testRule(makeRule(functionDecl(hasName("f"), hasBody(stmt().bind("body"))), + [&InlineX](const MatchFinder::MatchResult &R) { + const auto *Node = R.Nodes.getNodeAs("body"); + assert(Node != nullptr && "body must be bound"); + return transformer::detail::rewriteDescendants( + *Node, InlineX, R); + }), + Input, Expected); +} + +TEST_F(TransformerTest, RewriteDescendantsTypedDecl) { + std::string Input = + "int f(int x) { int y = x; { int z = x * x; } return x; }"; + std::string Expected = + "int f(int x) { int y = 3; { int z = 3 * 3; } return 3; }"; + auto InlineX = + makeRule(declRefExpr(to(varDecl(hasName("x")))), changeTo(cat("3"))); + testRule(makeRule(functionDecl(hasName("f")).bind("fun"), + [&InlineX](const MatchFinder::MatchResult &R) { + const auto *Node = R.Nodes.getNodeAs("fun"); + assert(Node != nullptr && "fun must be bound"); + return transformer::detail::rewriteDescendants( + *Node, InlineX, R); + }), + Input, Expected); +} + +TEST_F(TransformerTest, RewriteDescendantsTypedTypeLoc) { + std::string Input = "int f(int *x) { return *x; }"; + std::string Expected = "int f(char *x) { return *x; }"; + auto IntToChar = + makeRule(typeLoc(loc(qualType(isInteger(), builtinType()))).bind("loc"), + changeTo(cat("char"))); + testRule( + makeRule( + functionDecl( + hasName("f"), + hasParameter(0, varDecl(hasTypeLoc(typeLoc().bind("parmType"))))), + [&IntToChar](const MatchFinder::MatchResult &R) { + const auto *Node = R.Nodes.getNodeAs("parmType"); + assert(Node != nullptr && "parmType must be bound"); + return transformer::detail::rewriteDescendants(*Node, IntToChar, R); + }), + Input, Expected); +} + +TEST_F(TransformerTest, RewriteDescendantsTypedDynTyped) { + // Add an unrelated definition to the header that also has a variable named + // "x", to test that the rewrite is limited to the scope we intend. + appendToHeader(R"cc(int g(int x) { return x; })cc"); + std::string Input = + "int f(int x) { int y = x; { int z = x * x; } return x; }"; + std::string Expected = + "int f(int x) { int y = 3; { int z = 3 * 3; } return 3; }"; + auto InlineX = + makeRule(declRefExpr(to(varDecl(hasName("x")))), changeTo(cat("3"))); + testRule( + makeRule(functionDecl(hasName("f"), hasBody(stmt().bind("body"))), + [&InlineX](const MatchFinder::MatchResult &R) { + auto It = R.Nodes.getMap().find("body"); + assert(It != R.Nodes.getMap().end() && "body must be bound"); + return transformer::detail::rewriteDescendants(It->second, + InlineX, R); + }), + Input, Expected); +} + TEST_F(TransformerTest, InsertBeforeEdit) { std::string Input = R"cc( int f() { From 898e42db93ae5ac87cc5ab4a93b14286b0b780ba Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 15:58:55 +0100 Subject: [PATCH 120/465] GlobalISel/Utils.h - remove unused includes. NFCI. Twine is unused, and TargetLowering can be reduced to a forward declaration and moved to Utils.cpp --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 5 ++--- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index b7d2489eda23c..50534860bec16 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -16,10 +16,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Register.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/MachineValueType.h" +#include namespace llvm { @@ -34,10 +33,10 @@ class MachineRegisterInfo; class MCInstrDesc; class RegisterBankInfo; class TargetInstrInfo; +class TargetLowering; class TargetPassConfig; class TargetRegisterInfo; class TargetRegisterClass; -class Twine; class ConstantFP; class APFloat; diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 303f2d8417b57..6f8d233043e70 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -13,7 +13,6 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" @@ -23,6 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" From 711b9806547b0392ff636499cebfb73f72d4c595 Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Thu, 3 Sep 2020 07:58:45 -0700 Subject: [PATCH 121/465] [fuzzer] Create user provided fuzzer writeable directories when requested if they dont exist Currently, libFuzzer will exit with an error message if a non-existent directory is provided for any of the appropriate arguments. For cases where libFuzzer is used in a specialized embedded environment, it would be much easier to have libFuzzer create the directories for the user. This patch accommodates for this scenario by allowing the user to provide the argument `-create_missing_dirs=1` which makes libFuzzer attempt to create the `artifact_prefix`, `exact_artifact_path`, `features_dir` and/or corpus directory if they don't already exist rather than throw an error and exit. Split off from D84808 as requested [here](https://reviews.llvm.org/D84808#2208546). Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D86733 --- compiler-rt/lib/fuzzer/FuzzerDriver.cpp | 30 ++++++++++++++----- compiler-rt/lib/fuzzer/FuzzerFlags.def | 4 +++ compiler-rt/lib/fuzzer/FuzzerIO.cpp | 32 ++++++++++++++++++++ compiler-rt/lib/fuzzer/FuzzerIO.h | 1 + compiler-rt/test/fuzzer/fuzzer-dirs.test | 38 ++++++++++++++++++++++++ 5 files changed, 98 insertions(+), 7 deletions(-) diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index 4669b12786fc2..2615014a02153 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -250,11 +250,26 @@ static void WorkerThread(const Command &BaseCmd, std::atomic *Counter, } } -static void ValidateDirectoryExists(const std::string &Path) { - if (!Path.empty() && !IsDirectory(Path)) { - Printf("ERROR: The required directory \"%s\" does not exist\n", Path.c_str()); +static void ValidateDirectoryExists(const std::string &Path, + bool CreateDirectory) { + if (Path.empty()) { + Printf("ERROR: Provided directory path is an empty string\n"); exit(1); } + + if (IsDirectory(Path)) + return; + + if (CreateDirectory) { + if (!MkDirRecursive(Path)) { + Printf("ERROR: Failed to create directory \"%s\"\n", Path.c_str()); + exit(1); + } + return; + } + + Printf("ERROR: The required directory \"%s\" does not exist\n", Path.c_str()); + exit(1); } std::string CloneArgsWithoutX(const Vector &Args, @@ -691,7 +706,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { std::string OutputCorpusDir = (*Inputs)[0]; if (!IsFile(OutputCorpusDir)) { Options.OutputCorpus = OutputCorpusDir; - ValidateDirectoryExists(Options.OutputCorpus); + ValidateDirectoryExists(Options.OutputCorpus, Flags.create_missing_dirs); } } Options.ReportSlowUnits = Flags.report_slow_units; @@ -705,11 +720,12 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { if (!IsSeparator(ArtifactPathDir[ArtifactPathDir.length() - 1])) { ArtifactPathDir = DirName(ArtifactPathDir); } - ValidateDirectoryExists(ArtifactPathDir); + ValidateDirectoryExists(ArtifactPathDir, Flags.create_missing_dirs); } if (Flags.exact_artifact_path) { Options.ExactArtifactPath = Flags.exact_artifact_path; - ValidateDirectoryExists(DirName(Options.ExactArtifactPath)); + ValidateDirectoryExists(DirName(Options.ExactArtifactPath), + Flags.create_missing_dirs); } Vector Dictionary; if (Flags.dict) @@ -735,7 +751,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.DataFlowTrace = Flags.data_flow_trace; if (Flags.features_dir) { Options.FeaturesDir = Flags.features_dir; - ValidateDirectoryExists(Options.FeaturesDir); + ValidateDirectoryExists(Options.FeaturesDir, Flags.create_missing_dirs); } if (Flags.collect_data_flow) Options.CollectDataFlow = Flags.collect_data_flow; diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def index 832224a705d2b..8114791466032 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -167,3 +167,7 @@ FUZZER_DEPRECATED_FLAG(use_clang_coverage) FUZZER_FLAG_STRING(data_flow_trace, "Experimental: use the data flow trace") FUZZER_FLAG_STRING(collect_data_flow, "Experimental: collect the data flow trace") + +FUZZER_FLAG_INT(create_missing_dirs, 0, "Automatically attempt to create " + "directories for arguments that would normally expect them to already " + "exist (i.e. artifact_prefix, exact_artifact_path, features_dir, corpus)") diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.cpp b/compiler-rt/lib/fuzzer/FuzzerIO.cpp index cbb1dbe1b86d2..c3330c3425d09 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerIO.cpp @@ -144,6 +144,38 @@ void VPrintf(bool Verbose, const char *Fmt, ...) { fflush(OutputFile); } +static bool MkDirRecursiveInner(const std::string &Leaf) { + // Prevent chance of potential infinite recursion + if (Leaf == ".") + return true; + + const std::string &Dir = DirName(Leaf); + + if (IsDirectory(Dir)) { + MkDir(Leaf); + return IsDirectory(Leaf); + } + + bool ret = MkDirRecursiveInner(Dir); + if (!ret) { + // Give up early if a previous MkDir failed + return ret; + } + + MkDir(Leaf); + return IsDirectory(Leaf); +} + +bool MkDirRecursive(const std::string &Dir) { + if (Dir.empty()) + return false; + + if (IsDirectory(Dir)) + return true; + + return MkDirRecursiveInner(Dir); +} + void RmDirRecursive(const std::string &Dir) { IterateDirRecursive( Dir, [](const std::string &Path) {}, diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.h b/compiler-rt/lib/fuzzer/FuzzerIO.h index 8def2e96304e7..6e3a0b470c5f6 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.h +++ b/compiler-rt/lib/fuzzer/FuzzerIO.h @@ -64,6 +64,7 @@ size_t FileSize(const std::string &Path); void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, Vector *V, bool TopDir); +bool MkDirRecursive(const std::string &Dir); void RmDirRecursive(const std::string &Dir); // Iterate files and dirs inside Dir, recursively. diff --git a/compiler-rt/test/fuzzer/fuzzer-dirs.test b/compiler-rt/test/fuzzer/fuzzer-dirs.test index 2bf2a8b143300..c822c2f95c305 100644 --- a/compiler-rt/test/fuzzer/fuzzer-dirs.test +++ b/compiler-rt/test/fuzzer/fuzzer-dirs.test @@ -16,6 +16,7 @@ RUN: %run %t-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=LONG LONG: INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 8192 bytes RUN: rm -rf %t/SUB1 +# Verify error message prints to console when directory does not exist RUN: rm -rf %t.dir && mkdir -p %t.dir RUN: not %run %t-SimpleTest -artifact_prefix=%t.dir/NONEXISTENT_DIR/ 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR_RGX RUN: not %run %t-SimpleTest -artifact_prefix=%t.dir/NONEXISTENT_DIR/myprefix 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR_RGX @@ -23,3 +24,40 @@ RUN: not %run %t-SimpleTest -features_dir=%t.dir/NONEXISTENT_DIR/ 2>&1 | FileChe RUN: not %run %t-SimpleTest %t.dir/NONEXISTENT_DIR 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR_RGX RUN: not %run %t-SimpleTest -exact_artifact_path=%t.dir/NONEXISTENT_DIR/myprefix 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR_RGX NONEXISTENT_DIR_RGX: ERROR: The required directory "{{.*/NONEXISTENT_DIR/?}}" does not exist + +# Verify error message prints to console when given directory is an empty +# string +RUN: not %run %t-SimpleTest "" 2>&1 | FileCheck %s --check-prefix=INVALID_DIR_RGX +INVALID_DIR_RGX: ERROR: Provided directory path is an empty string + +# Verify error message prints to console when directory creation fails +# For platforms without functioning chmod (i.e. Windows), use a forbidden +# character in the directory name. +RUN: rm -rf %t.dir && mkdir -p %t.dir/access_restricted +RUN: chmod u-w %t.dir/access_restricted || true +RUN: not %run %t-SimpleTest -create_missing_dirs=1 %t.dir/access_restricted/?corpus? 2>&1 | FileCheck %s --check-prefix=DIR_CREATION_FAILURE +DIR_CREATION_FAILURE: ERROR: Failed to create directory "{{.*/access_restricted/\?corpus\?}}" + +# Verify directories and sub-directories are created when -create_missing_dirs=1 +RUN: not %run %t-SimpleTest -create_missing_dirs=1 -artifact_prefix=%t.dir/subdira/./././artifacts/ -features_dir=%t.dir/subdirb/dummy_dir/././../subdirb/features/ %t.dir/subdirc/corpus +RUN: test -e %t.dir/subdira/artifacts/ +RUN: test -e %t.dir/subdirb/subdirb/features/ +RUN: test -e %t.dir/subdirc/corpus/ +RUN: test -e %t.dir/subdirb/dummy_dir + +# Verify directories and sub-directories are created for exact_artifact_path +# when -create_missing_dirs=1 +RUN: not %run %t-SimpleTest -create_missing_dirs=1 -exact_artifact_path=%t.dir/subdird/exact_artifacts/abc +RUN: test -e %t.dir/subdird/exact_artifacts/abc + +# Verify directories and sub-directories are created for artifact_prefix when +# it's referring to a file name prefix and -create_missing_dirs=1 +RUN: not %run %t-SimpleTest -create_missing_dirs=1 -artifact_prefix=%t.dir/subdire/myprefix +RUN: test -e %t.dir/subdire/ && not test -e %t.dir/subdire/myprefix + +# Verify directories are created when referring to relative paths and +# -create_missing_dirs=1 +RUN: cd %t.dir && not %run %t-SimpleTest -create_missing_dirs=1 -artifact_prefix=cwd_artifacts/ -features_dir=cwd_features/subdirtest/ ./cwd_corpus +RUN: test -e %t.dir/cwd_artifacts/ +RUN: test -e %t.dir/cwd_features/subdirtest/ +RUN: test -e %t.dir/cwd_corpus/ From baa74e013f7e30f09b002692913a705b704a58f5 Mon Sep 17 00:00:00 2001 From: JF Bastien Date: Thu, 3 Sep 2020 08:44:27 -0700 Subject: [PATCH 122/465] Step down from security group Propose Ahmed as a replacement. He's fixed many security issues in LLVM for Apple in the last few years, as such he'll fit the "Individual contributors" description. Differential Revision: https://reviews.llvm.org/D86742 --- llvm/docs/Security.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst index 2992091d69699..f7daf30bddcfa 100644 --- a/llvm/docs/Security.rst +++ b/llvm/docs/Security.rst @@ -24,10 +24,10 @@ Security Group Members The members of the group represent a wide cross-section of the community, and meet the criteria for inclusion below. +* Ahmed Bougacha (Apple) * Akila Srinivasan (Apple) * Dimitry Andric (individual; FreeBSD) * Ed Maste (individual; FreeBSD) -* JF Bastien (Apple) * Josh Eads (Sony) * Kristof Beyls (ARM) * Matthew Riley (Google) From 7bc9924cb2fbd9f3ae53577607822ace267a04e6 Mon Sep 17 00:00:00 2001 From: Jamie Schmeiser Date: Thu, 3 Sep 2020 15:52:27 +0000 Subject: [PATCH 123/465] Add new hidden option -print-changed which only reports changes to IR A new hidden option -print-changed is added along with code to support printing the IR as it passes through the opt pipeline in the new pass manager. Only those passes that change the IR are reported, with others only having the banner reported, indicating that they did not change the IR, were filtered out or ignored. Filtering of output via the -filter-print-funcs is supported and a new supporting hidden option -filter-passes is added. The latter takes a comma separated list of pass names and filters the output to only show those passes in the list that change the IR. The output can also be modified via the -print-module-scope function. The code introduces a template base class that generalizes the comparison of IRs that takes an IR representation as template parameter. The constructor takes a series of lambdas that provide an event based API for generalized reporting of IRs as they are changed in the opt pipeline through the new pass manager. The first of several instantiations is provided that prints the IR in a form similar to that produced by -print-after-all with the above mentioned filtering capabilities. This version, and the others to follow will be introduced at the upcoming developer's conference. See https://hotcrp.llvm.org/usllvm2020/paper/29 for more information. Reviewed By: yrouban (Yevgeny Rouban) Differential Revision: https://reviews.llvm.org/D86360 --- .../llvm/Passes/StandardInstrumentations.h | 94 ++++++++ llvm/lib/IR/LegacyPassManager.cpp | 4 +- llvm/lib/Passes/StandardInstrumentations.cpp | 219 ++++++++++++++++++ llvm/test/Other/change-printer.ll | 109 +++++++++ 4 files changed, 424 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Other/change-printer.ll diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 795e2770bbe18..5a93c51ad18bc 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -25,6 +25,7 @@ namespace llvm { +class Function; class Module; /// Instrumentation to print IR before/after passes. @@ -73,6 +74,98 @@ class PrintPassInstrumentation { bool DebugLogging; }; +// Base class for classes that report changes to the IR. +// It presents an interface for such classes and provides callbacks +// on various events as the new pass manager transforms the IR. +// It also provides filtering of information based on hidden options +// specifying which functions are interesting. +// Callbacks are made for the following events/queries: +// 1. The initial IR processed. +// 2. To get the representation of the IR (of type \p T). +// 3. When a pass does not change the IR. +// 4. When a pass changes the IR (given both before and after representations +// of type \p T). +// 5. When an IR is invalidated. +// 6. When a pass is run on an IR that is not interesting (based on options). +// 7. When a pass is ignored (pass manager or adapter pass). +// 8. To compare two IR representations (of type \p T). +template class ChangePrinter { +protected: + ChangePrinter( + std::function HandleInitialIRFunc, + std::function + GenerateIRRepresentationFunc, + std::function OmitAfterFunc, + std::function + HandleAfterFunc, + std::function HandleInvalidatedFunc, + std::function + HandleFilteredFunc, + std::function + HandleIgnoredFunc, + std::function SameFunc) + : HandleInitialIR(HandleInitialIRFunc), + GenerateIRRepresentation(GenerateIRRepresentationFunc), + OmitAfter(OmitAfterFunc), HandleAfter(HandleAfterFunc), + HandleInvalidated(HandleInvalidatedFunc), + HandleFiltered(HandleFilteredFunc), HandleIgnored(HandleIgnoredFunc), + Same(SameFunc), InitialIR(true) {} + +public: + // Not virtual as classes are expected to be referenced as derived classes. + ~ChangePrinter() { + assert(BeforeStack.empty() && "Problem with Change Printer stack."); + } + + // Determine if this pass/IR is interesting and if so, save the IR + // otherwise it is left on the stack without data + void saveIRBeforePass(Any IR, StringRef PassID); + // Compare the IR from before the pass after the pass. + void handleIRAfterPass(Any IR, StringRef PassID); + // Handle the situation where a pass is invalidated. + void handleInvalidatedPass(StringRef PassID); + +private: + // callback on the first IR processed + std::function HandleInitialIR; + // callback before and after a pass to get the representation of the IR + std::function + GenerateIRRepresentation; + // callback when the pass is not iteresting + std::function OmitAfter; + // callback when interesting IR has changed + std::function + HandleAfter; + // callback when an interesting pass is invalidated + std::function HandleInvalidated; + // callback when the IR or pass is not interesting + std::function HandleFiltered; + // callback when an ignored pass is encountered + std::function HandleIgnored; + // callback to compare the before and after representations of the IR + std::function Same; + + // stack of IRs before passes + std::vector BeforeStack; + // Is this the first IR seen? + bool InitialIR; +}; + +// A change printer based on the string representation of the IR as created +// by unwrapAndPrint. The string representation is stored in a std::string +// to preserve it as the IR changes in each pass. Note that the banner is +// included in this representation but it is massaged before reporting. +class IRChangePrinter : public ChangePrinter { +public: + IRChangePrinter(); + void registerCallbacks(PassInstrumentationCallbacks &PIC); + +protected: + raw_ostream &Out; +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -80,6 +173,7 @@ class StandardInstrumentations { PrintPassInstrumentation PrintPass; TimePassesHandler TimePasses; OptNoneInstrumentation OptNone; + IRChangePrinter PrintChangedIR; public: StandardInstrumentations(bool DebugLogging) : PrintPass(DebugLogging) {} diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 8d9ed917bb617..63886f4861708 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -87,14 +87,14 @@ static cl::opt PrintAfterAll("print-after-all", static cl::opt PrintModuleScope("print-module-scope", cl::desc("When printing IR for print-[before|after]{-all} " - "always print a module IR"), + "and change reporters always print a module IR"), cl::init(false), cl::Hidden); static cl::list PrintFuncsList("filter-print-funcs", cl::value_desc("function names"), cl::desc("Only print IR for functions whose name " "match this for all print-[before|after][-all] " - "options"), + "and change reporter options"), cl::CommaSeparated, cl::Hidden); /// This is a helper to determine whether to print IR before or diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index da58fa57bdae7..d36bfd87f8a62 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" +#include #include using namespace llvm; @@ -43,6 +44,34 @@ static cl::opt cl::desc("Print all pass management debugging information. " "`-debug-pass-manager` must also be specified")); +// A hidden option that prints out the IR after passes, similar to +// -print-after-all except that it only prints the IR after passes that +// change the IR. Those passes that do not make changes to the IR are +// reported as not making any changes. In addition, the initial IR is +// also reported. Other hidden options affect the output from this +// option. -filter-passes will limit the output to the named passes +// that actually change the IR and other passes are reported as filtered out. +// The specified passes will either be reported as making no changes (with +// no IR reported) or the changed IR will be reported. Also, the +// -filter-print-funcs and -print-module-scope options will do similar +// filtering based on function name, reporting changed IRs as functions(or +// modules if -print-module-scope is specified) for a particular function +// or indicating that the IR has been filtered out. The extra options +// can be combined, allowing only changed IRs for certain passes on certain +// functions to be reported in different formats, with the rest being +// reported as filtered out. +static cl::opt PrintChanged("print-changed", + cl::desc("Print changed IRs"), + cl::init(false), cl::Hidden); +// A hidden option that supports the -print-changed option. See +// the description for -print-changed for an explanation of the use +// of this option. Note that this option has no effect without -print-changed. +static cl::list + PrintPassesList("filter-passes", cl::value_desc("pass names"), + cl::desc("Only consider IR changes for passes whose names " + "match for the print-changed option"), + cl::CommaSeparated, cl::Hidden); + namespace { /// Extracting Module out of \p IR unit. Also fills a textual description @@ -189,8 +218,197 @@ void unwrapAndPrint(raw_ostream &OS, Any IR, StringRef Banner, llvm_unreachable("Unknown wrapped IR type"); } +// Return true when this is a pass for which printing of changes is desired. +inline bool isIgnored(StringRef PassID) { + return PassID.startswith("PassManager<") || PassID.contains("PassAdaptor<"); +} + +// Return true when this is a defined function for which printing +// of changes is desired. +inline bool isInterestingFunction(const Function &F) { + return llvm::isFunctionInPrintList(F.getName()); +} + +// Return true when this is a pass for which printing of changes is desired. +inline bool isInterestingPass(StringRef PassID) { + if (isIgnored(PassID)) + return false; + + static std::unordered_set PrintPassNames(PrintPassesList.begin(), + PrintPassesList.end()); + return PrintPassNames.empty() || PrintPassNames.count(PassID.str()); +} + +// Return true when this is a pass on IR for which printing +// of changes is desired. +bool isInteresting(Any IR, StringRef PassID) { + if (!isInterestingPass(PassID)) + return false; + if (any_isa(IR)) + return isInterestingFunction(*any_cast(IR)); + return true; +} + } // namespace +template +void ChangePrinter::saveIRBeforePass(Any IR, StringRef PassID) { + // Always need to place something on the stack because invalidated passes + // are not given the IR so it cannot be determined whether the pass was for + // something that was filtered out. + BeforeStack.emplace_back(); + + if (!isInteresting(IR, PassID)) + return; + // Is this the initial IR? + if (InitialIR) { + InitialIR = false; + HandleInitialIR(IR); + } + + // Save the IR representation on the stack. + auto &Data = BeforeStack.back(); + GenerateIRRepresentation(IR, PassID, Data); +} + +template +void ChangePrinter::handleIRAfterPass(Any IR, StringRef PassID) { + assert(!BeforeStack.empty() && "Unexpected empty stack encountered."); + std::string Name; + + // unwrapModule has inconsistent handling of names for function IRs. + if (any_isa(IR)) { + const Function *F = any_cast(IR); + Name = formatv(" (function: {0})", F->getName()).str(); + } else { + if (auto UM = unwrapModule(IR)) + Name = UM->second; + } + if (Name == "") + Name = " (module)"; + + if (isIgnored(PassID)) + HandleIgnored(PassID, Name); + else if (!isInteresting(IR, PassID)) + HandleFiltered(PassID, Name); + else { + // Get the before rep from the stack + T &Before = BeforeStack.back(); + // Create the after rep + T After; + GenerateIRRepresentation(IR, PassID, After); + + // was there a change in IR? + if (Same(Before, After)) + OmitAfter(PassID, Name); + else + HandleAfter(PassID, Name, Before, After, IR); + } + BeforeStack.pop_back(); +} + +template +void ChangePrinter::handleInvalidatedPass(StringRef PassID) { + assert(!BeforeStack.empty() && "Unexpected empty stack encountered."); + + // Always flag it as invalidated as we cannot determine when + // a pass for a filtered function is invalidated since we do not + // get the IR in the callback. Also, the output is just alternate + // forms of the banner anyway. + HandleInvalidated(PassID); + BeforeStack.pop_back(); +} + +void handleInitialIR(Any IR, raw_ostream &Out) { + StringRef Banner("*** IR Dump At Start: ***"); + unwrapAndPrint(Out, IR, Banner, true); +} + +void generateOutput(Any IR, StringRef PassID, std::string &Output) { + raw_string_ostream OS(Output); + // use the after banner for all cases so it will match + SmallString<20> Banner = formatv("*** IR Dump After {0} ***", PassID); + unwrapAndPrint(OS, IR, Banner, llvm::forcePrintModuleIR()); + OS.str(); +} + +void omitAfter(StringRef PassID, std::string &Name, raw_ostream &Out) { + Out << formatv("*** IR Dump After {0}{1} omitted because no change ***\n", + PassID, Name); +} + +void handleAfter(const StringRef After, std::string &Name, raw_ostream &Out) { + assert(After.find("*** IR Dump") == 0 && "Unexpected banner format."); + StringRef Banner = After.take_until([](char C) -> bool { return C == '\n'; }); + Out << Banner; + + // LazyCallGraph::SCC already has "(scc:..." in banner so only add + // in the name if it isn't already there. + if (Name.substr(0, 6).compare(" (scc:") != 0 && !llvm::forcePrintModuleIR()) + Out << Name; + + Out << After.substr(Banner.size()); +} + +void handleInvalidated(StringRef PassID, raw_ostream &Out) { + Out << formatv("*** IR Pass {0} invalidated ***\n", PassID); +} + +void handleFiltered(StringRef PassID, std::string &Name, raw_ostream &Out) { + SmallString<20> Banner = + formatv("*** IR Dump After {0}{1} filtered out ***\n", PassID, Name); + Out << Banner; +} + +void handleIgnored(StringRef PassID, std::string &Name, raw_ostream &Out) { + Out << formatv("*** IR Pass {0}{1} ignored ***\n", PassID, Name); +} + +bool sameIR(const std::string &S1, const std::string &S2) { + return S1.compare(S2) == 0; +} + +IRChangePrinter::IRChangePrinter() + : ChangePrinter( + [this](Any IR) -> void { ::handleInitialIR(IR, Out); }, + ::generateOutput, + [this](StringRef PassID, std::string &Name) -> void { + ::omitAfter(PassID, Name, Out); + }, + [this](StringRef PassID, std::string &Name, const std::string &Before, + const std::string &After, + Any IR) -> void { ::handleAfter(After, Name, Out); }, + [this](StringRef PassID) -> void { + ::handleInvalidated(PassID, Out); + }, + [this](StringRef PassID, std::string &Name) -> void { + ::handleFiltered(PassID, Name, Out); + }, + [this](StringRef PassID, std::string &Name) -> void { + ::handleIgnored(PassID, Name, Out); + }, + ::sameIR), + Out(dbgs()) {} + +void IRChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { + if (!PrintChanged) + return; + + PIC.registerBeforePassCallback([this](StringRef P, Any IR) { + saveIRBeforePass(IR, P); + return true; + }); + + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &) { + handleIRAfterPass(IR, P); + }); + PIC.registerAfterPassInvalidatedCallback( + [this](StringRef P, const PreservedAnalyses &) { + handleInvalidatedPass(P); + }); +} + PrintIRInstrumentation::~PrintIRInstrumentation() { assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit"); } @@ -344,4 +562,5 @@ void StandardInstrumentations::registerCallbacks( PrintPass.registerCallbacks(PIC); TimePasses.registerCallbacks(PIC); OptNone.registerCallbacks(PIC); + PrintChangedIR.registerCallbacks(PIC); } diff --git a/llvm/test/Other/change-printer.ll b/llvm/test/Other/change-printer.ll new file mode 100644 index 0000000000000..51354fcc0341e --- /dev/null +++ b/llvm/test/Other/change-printer.ll @@ -0,0 +1,109 @@ +; Simple checks of -print-changed functionality +; +; Note that (mostly) only the banners are checked. +; +; Simple functionality check. +; RUN: opt -S -print-changed -passes=instsimplify 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK0 +; +; Check that only the passes that change the IR are printed and that the +; others (including g) are filtered out. +; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK1 +; +; Check that the reporting of IRs respects -print-module-scope +; RUN: opt -S -print-changed -passes=instsimplify -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK2 +; +; Check that the reporting of IRs respects -print-module-scope +; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs=f -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK3 +; +; Check that reporting of multiple functions happens +; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs="f,g" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK4 +; +; Check that the reporting of IRs respects -filter-passes +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK5 +; +; Check that the reporting of IRs respects -filter-passes with multiple passes +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK6 +; +; Check that the reporting of IRs respects both -filter-passes and -filter-print-funcs +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK7 +; +; Check that the reporting of IRs respects -filter-passes, -filter-print-funcs and -print-module-scope +; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK8 +; +; Check that repeated passes that change the IR are printed and that the +; others (including g) are filtered out. Note that the second time +; instsimplify is run on f, it does not change the IR +; RUN: opt -S -print-changed -passes="instsimplify,instsimplify" -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK9 + +define i32 @g() { +entry: + %a = add i32 2, 3 + ret i32 %a +} + +define i32 @f() { +entry: + %a = add i32 2, 3 + ret i32 %a +} + +; CHECK0: *** IR Dump At Start: *** +; CHECK0: ; ModuleID = '' +; CHECK0: *** IR Dump After VerifierPass (module) omitted because no change *** +; CHECK0: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK0: *** IR Pass PassManager (function: g) ignored *** +; CHECK0: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK0: *** IR Pass PassManager (function: f) ignored *** +; CHECK0: *** IR Pass ModuleToFunctionPassAdaptor > (module) ignored *** +; CHECK0: *** IR Dump After VerifierPass (module) omitted because no change *** +; CHECK0: *** IR Dump After PrintModulePass (module) omitted because no change *** + +; CHECK1: *** IR Dump At Start: *** +; CHECK1: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK1: *** IR Dump After InstSimplifyPass *** (function: f) + +; CHECK2: *** IR Dump At Start: *** +; CHECK2: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK2: ModuleID = '' +; CHECK2: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK2: ModuleID = '' + +; CHECK3: *** IR Dump At Start: *** +; CHECK3: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK3: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK3: ModuleID = '' + +; CHECK4: *** IR Dump At Start: *** +; CHECK4: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK4: *** IR Dump After InstSimplifyPass *** (function: f) + +; CHECK5: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK5: *** IR Dump At Start: *** (function: g) +; CHECK5: *** IR Dump After NoOpFunctionPass (function: g) omitted because no change *** +; CHECK5: *** IR Dump After InstSimplifyPass (function: f) filtered out *** +; CHECK5: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK6: *** IR Dump At Start: *** (function: g) +; CHECK6: *** IR Dump After InstSimplifyPass *** (function: g) +; CHECK6: *** IR Dump After NoOpFunctionPass (function: g) omitted because no change *** +; CHECK6: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK6: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK7: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK7: *** IR Dump After NoOpFunctionPass (function: g) filtered out *** +; CHECK7: *** IR Dump At Start: *** (function: f) +; CHECK7: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK7: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK8: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK8: *** IR Dump After NoOpFunctionPass (function: g) filtered out *** +; CHECK8: *** IR Dump At Start: *** (function: f) +; CHECK8: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK8: ModuleID = '' +; CHECK8: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** + +; CHECK9: *** IR Dump At Start: *** +; CHECK9: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK9: *** IR Dump After InstSimplifyPass (function: g) filtered out *** +; CHECK9: *** IR Dump After InstSimplifyPass *** (function: f) +; CHECK9: *** IR Dump After InstSimplifyPass (function: f) omitted because no change *** From 62673c430de43837b0f177089ed184a0ffcd5678 Mon Sep 17 00:00:00 2001 From: Dokyung Song Date: Fri, 31 Jul 2020 00:07:20 +0000 Subject: [PATCH 124/465] [libFuzzer] Add an option to keep initial seed inputs around. This patch adds an option "keep_seed" to keep all initial seed inputs in the corpus. Previously, only the initial seed inputs that find new coverage were added to the corpus, and all the other initial inputs were discarded. We observed in some circumstances that useful initial seed inputs are discarded as they find no new coverage, even though they contain useful fragments in them (e.g., SQLITE3 FuzzBench benchmark). This newly added option provides a way to keeping seed inputs in the corpus for those circumstances. With this patch, and with -keep_seed=1, all initial seed inputs are kept in the corpus regardless of whether they find new coverage or not. Further, these seed inputs are not replaced with smaller inputs even if -reduce_inputs=1. Differential Revision: https://reviews.llvm.org/D86577 --- compiler-rt/lib/fuzzer/FuzzerCorpus.h | 4 +- compiler-rt/lib/fuzzer/FuzzerDriver.cpp | 1 + compiler-rt/lib/fuzzer/FuzzerFlags.def | 4 ++ compiler-rt/lib/fuzzer/FuzzerFork.cpp | 14 ++++--- compiler-rt/lib/fuzzer/FuzzerInternal.h | 3 +- compiler-rt/lib/fuzzer/FuzzerLoop.cpp | 20 ++++++---- compiler-rt/lib/fuzzer/FuzzerOptions.h | 1 + .../lib/fuzzer/tests/FuzzerUnittest.cpp | 6 ++- compiler-rt/test/fuzzer/KeepSeedTest.cpp | 37 +++++++++++++++++++ compiler-rt/test/fuzzer/keep-seed.test | 17 +++++++++ 10 files changed, 90 insertions(+), 17 deletions(-) create mode 100644 compiler-rt/test/fuzzer/KeepSeedTest.cpp create mode 100644 compiler-rt/test/fuzzer/keep-seed.test diff --git a/compiler-rt/lib/fuzzer/FuzzerCorpus.h b/compiler-rt/lib/fuzzer/FuzzerCorpus.h index 54d1e09ec6df0..fd8ff6ca427dd 100644 --- a/compiler-rt/lib/fuzzer/FuzzerCorpus.h +++ b/compiler-rt/lib/fuzzer/FuzzerCorpus.h @@ -33,6 +33,7 @@ struct InputInfo { // Stats. size_t NumExecutedMutations = 0; size_t NumSuccessfullMutations = 0; + bool NeverReduce = false; bool MayDeleteFile = false; bool Reduced = false; bool HasFocusFunction = false; @@ -177,7 +178,7 @@ class InputCorpus { bool empty() const { return Inputs.empty(); } const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, - bool HasFocusFunction, + bool HasFocusFunction, bool NeverReduce, const Vector &FeatureSet, const DataFlowTrace &DFT, const InputInfo *BaseII) { assert(!U.empty()); @@ -187,6 +188,7 @@ class InputCorpus { InputInfo &II = *Inputs.back(); II.U = U; II.NumFeatures = NumFeatures; + II.NeverReduce = NeverReduce; II.MayDeleteFile = MayDeleteFile; II.UniqFeatureSet = FeatureSet; II.HasFocusFunction = HasFocusFunction; diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index 2615014a02153..a13fb03a7fc1c 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -671,6 +671,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.Verbosity = Flags.verbosity; Options.MaxLen = Flags.max_len; Options.LenControl = Flags.len_control; + Options.KeepSeed = Flags.keep_seed; Options.UnitTimeoutSec = Flags.timeout; Options.ErrorExitCode = Flags.error_exitcode; Options.TimeoutExitCode = Flags.timeout_exitcode; diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def index 8114791466032..1dc805d4e539d 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -23,6 +23,10 @@ FUZZER_FLAG_INT(len_control, 100, "Try generating small inputs first, " FUZZER_FLAG_STRING(seed_inputs, "A comma-separated list of input files " "to use as an additional seed corpus. Alternatively, an \"@\" followed by " "the name of a file containing the comma-separated list.") +FUZZER_FLAG_INT(keep_seed, 0, "If 1, keep seed inputs in the corpus even if " + "they do not produce new coverage. When used with |reduce_inputs==1|, the " + "seed inputs will never be reduced. This option can be useful when seeds are" + "not properly formed for the fuzz target but still have useful snippets.") FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") FUZZER_FLAG_INT(mutate_depth, 5, "Apply this number of consecutive mutations to each input.") diff --git a/compiler-rt/lib/fuzzer/FuzzerFork.cpp b/compiler-rt/lib/fuzzer/FuzzerFork.cpp index d9e6b79443e0d..84725d22a9c78 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFork.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerFork.cpp @@ -309,11 +309,15 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, else Env.MainCorpusDir = CorpusDirs[0]; - auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); - CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, - {}, &Env.Cov, - CFPath, false); - RemoveFile(CFPath); + if (Options.KeepSeed) { + for (auto &File : SeedFiles) + Env.Files.push_back(File.File); + } else { + auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); + CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, + {}, &Env.Cov, CFPath, false); + RemoveFile(CFPath); + } Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, Env.Files.size(), Env.TempDir.c_str()); diff --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h index 31096ce804bc1..2b172d9122277 100644 --- a/compiler-rt/lib/fuzzer/FuzzerInternal.h +++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h @@ -67,7 +67,8 @@ class Fuzzer { void ExecuteCallback(const uint8_t *Data, size_t Size); bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false, - InputInfo *II = nullptr, bool *FoundUniqFeatures = nullptr); + InputInfo *II = nullptr, bool ForceAddToCorpus = false, + bool *FoundUniqFeatures = nullptr); // Merge Corpora[1:] into Corpora[0]. void Merge(const Vector &Corpora); diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index 02db6d27b0a3e..d76075b0a81a3 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -464,7 +464,8 @@ static void RenameFeatureSetFile(const std::string &FeaturesDir, } bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, - InputInfo *II, bool *FoundUniqFeatures) { + InputInfo *II, bool ForceAddToCorpus, + bool *FoundUniqFeatures) { if (!Size) return false; @@ -478,7 +479,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, UniqFeatureSetTmp.push_back(Feature); if (Options.Entropic) Corpus.UpdateFeatureFrequency(II, Feature); - if (Options.ReduceInputs && II) + if (Options.ReduceInputs && II && !II->NeverReduce) if (std::binary_search(II->UniqFeatureSet.begin(), II->UniqFeatureSet.end(), Feature)) FoundUniqFeaturesOfII++; @@ -487,11 +488,12 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, *FoundUniqFeatures = FoundUniqFeaturesOfII; PrintPulseAndReportSlowInput(Data, Size); size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore; - if (NumNewFeatures) { + if (NumNewFeatures || ForceAddToCorpus) { TPC.UpdateObservedPCs(); - auto NewII = Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, - MayDeleteFile, TPC.ObservedFocusFunction(), - UniqFeatureSetTmp, DFT, II); + auto NewII = + Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile, + TPC.ObservedFocusFunction(), ForceAddToCorpus, + UniqFeatureSetTmp, DFT, II); WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1), NewII->UniqFeatureSet); return true; @@ -700,7 +702,7 @@ void Fuzzer::MutateAndTestOne() { bool FoundUniqFeatures = false; bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II, - &FoundUniqFeatures); + /*ForceAddToCorpus*/ false, &FoundUniqFeatures); TryDetectingAMemoryLeak(CurrentUnitData, Size, /*DuringInitialCorpusExecution*/ false); if (NewCov) { @@ -768,7 +770,9 @@ void Fuzzer::ReadAndExecuteSeedCorpora(Vector &CorporaFiles) { for (auto &SF : CorporaFiles) { auto U = FileToVector(SF.File, MaxInputLen, /*ExitOnError=*/false); assert(U.size() <= MaxInputLen); - RunOne(U.data(), U.size()); + RunOne(U.data(), U.size(), /*MayDeleteFile*/ false, /*II*/ nullptr, + /*ForceAddToCorpus*/ Options.KeepSeed, + /*FoundUniqFeatures*/ nullptr); CheckExitOnSrcPosOrItem(); TryDetectingAMemoryLeak(U.data(), U.size(), /*DuringInitialCorpusExecution*/ true); diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h index b75e7c7af7093..26961973d50bb 100644 --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -18,6 +18,7 @@ struct FuzzingOptions { int Verbosity = 1; size_t MaxLen = 0; size_t LenControl = 1000; + bool KeepSeed = false; int UnitTimeoutSec = 300; int TimeoutExitCode = 70; int OOMExitCode = 71; diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp index 0e9435ab8fcb4..93b54f54c19aa 100644 --- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -597,8 +597,10 @@ TEST(Corpus, Distribution) { size_t N = 10; size_t TriesPerUnit = 1<<16; for (size_t i = 0; i < N; i++) - C->AddToCorpus(Unit{static_cast(i)}, 1, false, false, {}, DFT, - nullptr); + C->AddToCorpus(Unit{static_cast(i)}, /*NumFeatures*/ 1, + /*MayDeleteFile*/ false, /*HasFocusFunction*/ false, + /*ForceAddToCorpus*/ false, /*FeatureSet*/ {}, DFT, + /*BaseII*/ nullptr); Vector Hist(N); for (size_t i = 0; i < N * TriesPerUnit; i++) { diff --git a/compiler-rt/test/fuzzer/KeepSeedTest.cpp b/compiler-rt/test/fuzzer/KeepSeedTest.cpp new file mode 100644 index 0000000000000..f343161abde52 --- /dev/null +++ b/compiler-rt/test/fuzzer/KeepSeedTest.cpp @@ -0,0 +1,37 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// Test whether the fuzzer can find "SELECT FROM WHERE", given a seed input +// "SELECTxFROMxWHERE". Without -keep_seed=1, it takes longer time to trigger +// find the desired string, because the seed input is more likely to be reduced +// to a prefix of the given input first, losing useful fragments towards the end +// of the seed input. +#include +#include +#include +#include + +static volatile int Sink = 0; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 17) + return 0; + + if (Size >= 6 && Data[0] == 'S' && Data[1] == 'E' && Data[2] == 'L' && + Data[3] == 'E' && Data[4] == 'C' && Data[5] == 'T') { + if (Size >= 7 && Data[6] == ' ') { + if (Size >= 11 && Data[7] == 'F' && Data[8] == 'R' && Data[9] == 'O' && + Data[10] == 'M') { + if (Size >= 12 && Data[11] == ' ') { + if (Size >= 17 && Data[12] == 'W' && Data[13] == 'H' && + Data[14] == 'E' && Data[15] == 'R' && Data[16] == 'E') { + fprintf(stderr, "BINGO; Found the target, exiting.\n"); + exit(1); + } + } + } + } + } + return 0; +} diff --git a/compiler-rt/test/fuzzer/keep-seed.test b/compiler-rt/test/fuzzer/keep-seed.test new file mode 100644 index 0000000000000..29212ac7c177c --- /dev/null +++ b/compiler-rt/test/fuzzer/keep-seed.test @@ -0,0 +1,17 @@ +REQUIRES: linux, x86_64 +RUN: %cpp_compiler %S/KeepSeedTest.cpp -o %t-KeepSeedTest + +RUN: rm -rf %t-corpus +RUN: mkdir %t-corpus +RUN: echo -n SELECTxFROMxWHERE > %t-corpus/valid-fragments + +RUN: not %run %t-KeepSeedTest -keep_seed=1 -seed=1 -runs=2000000 %t-corpus 2>&1 | FileCheck %s +CHECK: BINGO + +RUN: rm -rf %t-corpus-baseline +RUN: mkdir %t-corpus-baseline +RUN: echo -n SELECTxFROMxWHERE > %t-corpus-baseline/valid-fragments + +# The following checks whether without -keep_seed=1 libFuzzer does not find the +# crashing input "SELECT FROM WHERE" even with 2x more runs. +RUN: %run %t-KeepSeedTest -seed=1 -runs=4000000 %t-corpus-baseline -print_final_stats=1 From ddcc7ce59150c9ebc6b0b2d61e7ef4f2525c11f4 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Thu, 3 Sep 2020 08:57:13 -0700 Subject: [PATCH 125/465] [lldb-vscode] Fix TestVSCode_module Caused by D86662. The fix is only checking some fields when the expect_debug_info_size flag is true. For some reason this was not failing on a local linux machine. --- lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py index db70e4a8124b6..7fa5f7d45267f 100644 --- a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py +++ b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py @@ -38,7 +38,8 @@ def run_test(self, symbol_basename, expect_debug_info_size): def checkSymbolsLoadedWithSize(): active_modules = self.vscode.get_active_modules() program_module = active_modules[program_basename] - symbolsStatus = program_module['debugInfoSize'] + self.assertIn('symbolFilePath', program_module) + self.assertIn(symbols_path, program_module['symbolFilePath']) symbol_regex = re.compile(r"[0-9]+(\.[0-9]*)?[KMG]?B") return symbol_regex.match(program_module['symbolStatus']) @@ -48,8 +49,6 @@ def checkSymbolsLoadedWithSize(): program_module = active_modules[program_basename] self.assertEqual(program_basename, program_module['name']) self.assertEqual(program, program_module['path']) - self.assertIn('symbolFilePath', program_module) - self.assertIn(symbols_path, program_module['symbolFilePath']) self.assertIn('addressRange', program_module) @skipIfWindows From f26fc568402f84a94557cbe86e7aac8319d61387 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 2 Sep 2020 18:56:12 +0200 Subject: [PATCH 126/465] Eliminate the sizing template parameter N from CoalescingBitVector Since the parameter is not used anywhere, and the default size of 16 apparently causes PR47359, remove it. This ensures that IntervalMap will automatically determine the optimal size, using its NodeSizer struct. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D87044 --- llvm/include/llvm/ADT/CoalescingBitVector.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/ADT/CoalescingBitVector.h b/llvm/include/llvm/ADT/CoalescingBitVector.h index f8c8fec0ec9e7..0a7dcfe226315 100644 --- a/llvm/include/llvm/ADT/CoalescingBitVector.h +++ b/llvm/include/llvm/ADT/CoalescingBitVector.h @@ -34,15 +34,14 @@ namespace llvm { /// performance for non-sequential find() operations. /// /// \tparam IndexT - The type of the index into the bitvector. -/// \tparam N - The first N coalesced intervals of set bits are stored in-place. -template class CoalescingBitVector { +template class CoalescingBitVector { static_assert(std::is_unsigned::value, "Index must be an unsigned integer."); - using ThisT = CoalescingBitVector; + using ThisT = CoalescingBitVector; /// An interval map for closed integer ranges. The mapped values are unused. - using MapT = IntervalMap; + using MapT = IntervalMap; using UnderlyingIterator = typename MapT::const_iterator; From c9239b2bf5f00b58aaa431955f24013e0cada0a3 Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Thu, 3 Sep 2020 09:28:34 -0700 Subject: [PATCH 127/465] [Analyzer][docs][NFC] Fix typo in code example --- clang/docs/analyzer/checkers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 3b378f735ebcc..7a294f916bcf9 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -1747,7 +1747,7 @@ Check for integer to enumeration casts that could result in undefined values. void foo() { TestEnum t = static_cast(-1); // warn: the value provided to the cast expression is not in - the valid range of values for the enum + // the valid range of values for the enum .. _alpha-cplusplus-InvalidatedIterator: From 9bb3a9eebb3b6c2ad32c84e630f7b48076751042 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 3 Sep 2020 11:36:32 -0400 Subject: [PATCH 128/465] [InstCombine] add tests for commutative intrinsics; NFC --- .../InstCombine/commutative-intrinsics.ll | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/commutative-intrinsics.ll diff --git a/llvm/test/Transforms/InstCombine/commutative-intrinsics.ll b/llvm/test/Transforms/InstCombine/commutative-intrinsics.ll new file mode 100644 index 0000000000000..92f85f51b4413 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/commutative-intrinsics.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +define i35 @smax(i35 %x) { +; CHECK-LABEL: @smax( +; CHECK-NEXT: [[R:%.*]] = call i35 @llvm.smax.i35(i35 42, i35 [[X:%.*]]) +; CHECK-NEXT: ret i35 [[R]] +; + %r = call i35 @llvm.smax.i35(i35 42, i35 %x) + ret i35 %r +} + +define i5 @smin(i5 %x) { +; CHECK-LABEL: @smin( +; CHECK-NEXT: [[R:%.*]] = call i5 @llvm.smin.i5(i5 10, i5 [[X:%.*]]) +; CHECK-NEXT: ret i5 [[R]] +; + %r = call i5 @llvm.smin.i5(i5 42, i5 %x) + ret i5 %r +} + +define <2 x i35> @umax(<2 x i35> %x) { +; CHECK-LABEL: @umax( +; CHECK-NEXT: [[R:%.*]] = call <2 x i35> @llvm.umax.v2i35(<2 x i35> , <2 x i35> [[X:%.*]]) +; CHECK-NEXT: ret <2 x i35> [[R]] +; + %r = call <2 x i35> @llvm.umax.v2i35(<2 x i35> , <2 x i35> %x) + ret <2 x i35> %r +} + +define <3 x i35> @umin(<3 x i35> %x) { +; CHECK-LABEL: @umin( +; CHECK-NEXT: [[R:%.*]] = call <3 x i35> @llvm.umin.v3i35(<3 x i35> , <3 x i35> [[X:%.*]]) +; CHECK-NEXT: ret <3 x i35> [[R]] +; + %r = call <3 x i35> @llvm.umin.v3i35(<3 x i35> , <3 x i35> %x) + ret <3 x i35> %r +} + +define i35 @smul_fix(i35 %x) { +; CHECK-LABEL: @smul_fix( +; CHECK-NEXT: [[R:%.*]] = call i35 @llvm.smul.fix.i35(i35 42, i35 [[X:%.*]], i32 2) +; CHECK-NEXT: ret i35 [[R]] +; + %r = call i35 @llvm.smul.fix.i35(i35 42, i35 %x, i32 2) + ret i35 %r +} + +define i5 @umul_fix(i5 %x) { +; CHECK-LABEL: @umul_fix( +; CHECK-NEXT: [[R:%.*]] = call i5 @llvm.umul.fix.i5(i5 10, i5 [[X:%.*]], i32 3) +; CHECK-NEXT: ret i5 [[R]] +; + %r = call i5 @llvm.umul.fix.i5(i5 42, i5 %x, i32 3) + ret i5 %r +} + +define <2 x i35> @smul_fix_sat(<2 x i35> %x) { +; CHECK-LABEL: @smul_fix_sat( +; CHECK-NEXT: [[R:%.*]] = call <2 x i35> @llvm.smul.fix.sat.v2i35(<2 x i35> , <2 x i35> [[X:%.*]], i32 4) +; CHECK-NEXT: ret <2 x i35> [[R]] +; + %r = call <2 x i35> @llvm.smul.fix.sat.v2i35(<2 x i35> , <2 x i35> %x, i32 4) + ret <2 x i35> %r +} + +define <3 x i35> @umul_fix_sat(<3 x i35> %x) { +; CHECK-LABEL: @umul_fix_sat( +; CHECK-NEXT: [[R:%.*]] = call <3 x i35> @llvm.umul.fix.sat.v3i35(<3 x i35> , <3 x i35> [[X:%.*]], i32 5) +; CHECK-NEXT: ret <3 x i35> [[R]] +; + %r = call <3 x i35> @llvm.umul.fix.sat.v3i35(<3 x i35> , <3 x i35> %x, i32 5) + ret <3 x i35> %r +} + +declare i35 @llvm.smax.i35(i35, i35) +declare i5 @llvm.smin.i5(i5, i5) +declare <2 x i35> @llvm.umax.v2i35(<2 x i35>, <2 x i35>) +declare <3 x i35> @llvm.umin.v3i35(<3 x i35>, <3 x i35>) +declare i35 @llvm.smul.fix.i35(i35, i35, i32) +declare i5 @llvm.umul.fix.i5(i5, i5, i32) +declare <2 x i35> @llvm.smul.fix.sat.v2i35(<2 x i35>, <2 x i35>, i32) +declare <3 x i35> @llvm.umul.fix.sat.v3i35(<3 x i35>, <3 x i35>, i32) From 2391a34f9f529705a9c7761df350e7f012cca191 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 3 Sep 2020 12:27:19 -0400 Subject: [PATCH 129/465] [InstCombine] canonicalize all commutative intrinsics with constant arg --- .../InstCombine/InstCombineCalls.cpp | 21 ++++++------------- .../InstCombine/commutative-intrinsics.ll | 16 +++++++------- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 995d0b6a8db71..dd4e48170f028 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -622,7 +622,7 @@ Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) { return nullptr; } -static Instruction *canonicalizeConstantArg0ToArg1(CallInst &Call) { +static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) { assert(Call.getNumArgOperands() > 1 && "Need at least 2 args to swap"); Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1); if (isa(Arg0) && !isa(Arg1)) { @@ -763,6 +763,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } + if (II->isCommutative()) { + if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI)) + return NewCall; + } + Intrinsic::ID IID = II->getIntrinsicID(); switch (IID) { case Intrinsic::objectsize: @@ -905,8 +910,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: { - if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) - return I; if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) return I; @@ -934,10 +937,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: - if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) - return I; - LLVM_FALLTHROUGH; - case Intrinsic::usub_with_overflow: if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) return I; @@ -968,9 +967,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { case Intrinsic::uadd_sat: case Intrinsic::sadd_sat: - if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) - return I; - LLVM_FALLTHROUGH; case Intrinsic::usub_sat: case Intrinsic::ssub_sat: { SaturatingInst *SI = cast(II); @@ -1051,8 +1047,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { case Intrinsic::maxnum: case Intrinsic::minimum: case Intrinsic::maximum: { - if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) - return I; Value *Arg0 = II->getArgOperand(0); Value *Arg1 = II->getArgOperand(1); Value *X, *Y; @@ -1161,9 +1155,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { LLVM_FALLTHROUGH; } case Intrinsic::fma: { - if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) - return I; - // fma fneg(x), fneg(y), z -> fma x, y, z Value *Src0 = II->getArgOperand(0); Value *Src1 = II->getArgOperand(1); diff --git a/llvm/test/Transforms/InstCombine/commutative-intrinsics.ll b/llvm/test/Transforms/InstCombine/commutative-intrinsics.ll index 92f85f51b4413..3622904fa07d7 100644 --- a/llvm/test/Transforms/InstCombine/commutative-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/commutative-intrinsics.ll @@ -3,7 +3,7 @@ define i35 @smax(i35 %x) { ; CHECK-LABEL: @smax( -; CHECK-NEXT: [[R:%.*]] = call i35 @llvm.smax.i35(i35 42, i35 [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call i35 @llvm.smax.i35(i35 [[X:%.*]], i35 42) ; CHECK-NEXT: ret i35 [[R]] ; %r = call i35 @llvm.smax.i35(i35 42, i35 %x) @@ -12,7 +12,7 @@ define i35 @smax(i35 %x) { define i5 @smin(i5 %x) { ; CHECK-LABEL: @smin( -; CHECK-NEXT: [[R:%.*]] = call i5 @llvm.smin.i5(i5 10, i5 [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call i5 @llvm.smin.i5(i5 [[X:%.*]], i5 10) ; CHECK-NEXT: ret i5 [[R]] ; %r = call i5 @llvm.smin.i5(i5 42, i5 %x) @@ -21,7 +21,7 @@ define i5 @smin(i5 %x) { define <2 x i35> @umax(<2 x i35> %x) { ; CHECK-LABEL: @umax( -; CHECK-NEXT: [[R:%.*]] = call <2 x i35> @llvm.umax.v2i35(<2 x i35> , <2 x i35> [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x i35> @llvm.umax.v2i35(<2 x i35> [[X:%.*]], <2 x i35> ) ; CHECK-NEXT: ret <2 x i35> [[R]] ; %r = call <2 x i35> @llvm.umax.v2i35(<2 x i35> , <2 x i35> %x) @@ -30,7 +30,7 @@ define <2 x i35> @umax(<2 x i35> %x) { define <3 x i35> @umin(<3 x i35> %x) { ; CHECK-LABEL: @umin( -; CHECK-NEXT: [[R:%.*]] = call <3 x i35> @llvm.umin.v3i35(<3 x i35> , <3 x i35> [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call <3 x i35> @llvm.umin.v3i35(<3 x i35> [[X:%.*]], <3 x i35> ) ; CHECK-NEXT: ret <3 x i35> [[R]] ; %r = call <3 x i35> @llvm.umin.v3i35(<3 x i35> , <3 x i35> %x) @@ -39,7 +39,7 @@ define <3 x i35> @umin(<3 x i35> %x) { define i35 @smul_fix(i35 %x) { ; CHECK-LABEL: @smul_fix( -; CHECK-NEXT: [[R:%.*]] = call i35 @llvm.smul.fix.i35(i35 42, i35 [[X:%.*]], i32 2) +; CHECK-NEXT: [[R:%.*]] = call i35 @llvm.smul.fix.i35(i35 [[X:%.*]], i35 42, i32 2) ; CHECK-NEXT: ret i35 [[R]] ; %r = call i35 @llvm.smul.fix.i35(i35 42, i35 %x, i32 2) @@ -48,7 +48,7 @@ define i35 @smul_fix(i35 %x) { define i5 @umul_fix(i5 %x) { ; CHECK-LABEL: @umul_fix( -; CHECK-NEXT: [[R:%.*]] = call i5 @llvm.umul.fix.i5(i5 10, i5 [[X:%.*]], i32 3) +; CHECK-NEXT: [[R:%.*]] = call i5 @llvm.umul.fix.i5(i5 [[X:%.*]], i5 10, i32 3) ; CHECK-NEXT: ret i5 [[R]] ; %r = call i5 @llvm.umul.fix.i5(i5 42, i5 %x, i32 3) @@ -57,7 +57,7 @@ define i5 @umul_fix(i5 %x) { define <2 x i35> @smul_fix_sat(<2 x i35> %x) { ; CHECK-LABEL: @smul_fix_sat( -; CHECK-NEXT: [[R:%.*]] = call <2 x i35> @llvm.smul.fix.sat.v2i35(<2 x i35> , <2 x i35> [[X:%.*]], i32 4) +; CHECK-NEXT: [[R:%.*]] = call <2 x i35> @llvm.smul.fix.sat.v2i35(<2 x i35> [[X:%.*]], <2 x i35> , i32 4) ; CHECK-NEXT: ret <2 x i35> [[R]] ; %r = call <2 x i35> @llvm.smul.fix.sat.v2i35(<2 x i35> , <2 x i35> %x, i32 4) @@ -66,7 +66,7 @@ define <2 x i35> @smul_fix_sat(<2 x i35> %x) { define <3 x i35> @umul_fix_sat(<3 x i35> %x) { ; CHECK-LABEL: @umul_fix_sat( -; CHECK-NEXT: [[R:%.*]] = call <3 x i35> @llvm.umul.fix.sat.v3i35(<3 x i35> , <3 x i35> [[X:%.*]], i32 5) +; CHECK-NEXT: [[R:%.*]] = call <3 x i35> @llvm.umul.fix.sat.v3i35(<3 x i35> [[X:%.*]], <3 x i35> , i32 5) ; CHECK-NEXT: ret <3 x i35> [[R]] ; %r = call <3 x i35> @llvm.umul.fix.sat.v3i35(<3 x i35> , <3 x i35> %x, i32 5) From b196c7192ff3d3700e77691bdc13fc14b0f46502 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 16:34:46 +0100 Subject: [PATCH 130/465] Fix Wdocumentation warning. NFCI. Remove \returns tag from a void function --- llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index a0e85c82868fb..cfaec85d3f3dd 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -1313,7 +1313,6 @@ class InstrRefBasedLDV : public LDVImpl { bool transferSpillOrRestoreInst(MachineInstr &MI); /// Examines \p MI for any registers that it defines, and notifies trackers. - /// \returns true if MI was recognized and processed. void transferRegisterDef(MachineInstr &MI); /// Copy one location to the other, accounting for movement of subregisters From 6731eb644a1ef5650c1c3b1757a4f94a21198085 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 16:42:05 +0100 Subject: [PATCH 131/465] Fix Wdocumentation trailing comments warnings. NFCI. --- llvm/lib/CodeGen/EarlyIfConversion.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index 517b2cd25fc48..a580d3cc5785c 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -866,8 +866,8 @@ bool EarlyIfConverter::shouldConvertIf() { // by inserting select instructions. MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); struct CriticalPathInfo { - unsigned Extra; //< Count of extra cycles that the component adds. - unsigned Depth; //< Absolute depth of the component in cycles. + unsigned Extra; // Count of extra cycles that the component adds. + unsigned Depth; // Absolute depth of the component in cycles. }; CriticalPathInfo Cond{}; CriticalPathInfo TBlock{}; From 46780cc0ee9a89115ac28a0aa8c26eca3a5b7653 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 16:47:21 +0100 Subject: [PATCH 132/465] PHIEliminationUtils.cpp - remove unnecessary MachineBasicBlock.h include. NFCI. This is already included in PHIEliminationUtils.h --- llvm/lib/CodeGen/PHIEliminationUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/llvm/lib/CodeGen/PHIEliminationUtils.cpp index bae96eb84521a..a3ae099199845 100644 --- a/llvm/lib/CodeGen/PHIEliminationUtils.cpp +++ b/llvm/lib/CodeGen/PHIEliminationUtils.cpp @@ -8,9 +8,9 @@ #include "PHIEliminationUtils.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" + using namespace llvm; // findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg From 83ca548fcb45890fdafadf7a314ddfaa7a9b79a6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 17:05:17 +0100 Subject: [PATCH 133/465] WebAssemblyUtilities.h - reduce unnecessary includes to forward declarations. NFCI. --- llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h index 337077178557d..8fa794c0b932e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h @@ -15,11 +15,10 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" - namespace llvm { +class MachineInstr; +class MachineOperand; class WebAssemblyFunctionInfo; namespace WebAssembly { From dc8d7d23d8d25e1ba4859c831151158ae6a617ef Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 3 Sep 2020 17:52:37 +0100 Subject: [PATCH 134/465] [ARM] Extra predicate load tests. NFC --- .../test/CodeGen/Thumb2/mve-pred-loadstore.ll | 286 ++++++++++++++++++ 1 file changed, 286 insertions(+) diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll index fffafa16c6da2..b2ef0d41be6bf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll @@ -357,3 +357,289 @@ entry: store <2 x i1> %c, <2 x i1>* %dst ret void } + +define arm_aapcs_vfpcc <4 x i32> @load_predcastzext(i16* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_predcastzext: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldrh r0, [r0] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_predcastzext: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldrh r0, [r0] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %l = load i16, i16* %i, align 4 + %lz = zext i16 %l to i32 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %lz) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %l = load i32, i32* %i, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <8 x i16> @load_predcast8(i32* %i, <8 x i16> %a) { +; CHECK-LE-LABEL: load_predcast8: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_predcast8: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vrev32.16 q0, q0 +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.16 q0, q1 +; CHECK-BE-NEXT: bx lr + %l = load i32, i32* %i, align 4 + %c = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %l) + %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer + ret <8 x i16> %s +} + +define arm_aapcs_vfpcc <16 x i8> @load_predcast16(i32* %i, <16 x i8> %a) { +; CHECK-LE-LABEL: load_predcast16: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_predcast16: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.8 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.8 q0, q1 +; CHECK-BE-NEXT: bx lr + %l = load i32, i32* %i, align 4 + %c = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %l) + %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer + ret <16 x i8> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_align2(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_align2: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_align2: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %l = load i32, i32* %i, align 2 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_offset(i16* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_offset: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr.w r0, [r0, #6] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_offset: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr.w r0, [r0, #6] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i16, i16* %i, i32 3 + %gb = bitcast i16* %g to i32* + %l = load i32, i32* %gb, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_range4(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_range4: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r0, [r0, #4] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_range4: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr r0, [r0, #4] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i32, i32* %i, i32 1 + %l = load i32, i32* %g, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_range(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_range: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr.w r0, [r0, #508] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_range: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr.w r0, [r0, #508] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i32, i32* %i, i32 127 + %l = load i32, i32* %g, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_range2(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_range2: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: movw r1, #65028 +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: movt r1, #65535 +; CHECK-LE-NEXT: ldr r0, [r0, r1] +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_range2: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: movw r1, #65028 +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: movt r1, #65535 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: ldr r0, [r0, r1] +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i32, i32* %i, i32 -127 + %l = load i32, i32* %g, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_range3(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_range3: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr.w r0, [r0, #512] +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_range3: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr.w r0, [r0, #512] +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i32, i32* %i, i32 128 + %l = load i32, i32* %g, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +define arm_aapcs_vfpcc <4 x i32> @load_bc4_range5(i32* %i, <4 x i32> %a) { +; CHECK-LE-LABEL: load_bc4_range5: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: movw r1, #65024 +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: movt r1, #65535 +; CHECK-LE-NEXT: ldr r0, [r0, r1] +; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: load_bc4_range5: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: movw r1, #65024 +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: movt r1, #65535 +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: ldr r0, [r0, r1] +; CHECK-BE-NEXT: vmsr p0, r0 +; CHECK-BE-NEXT: vpsel q1, q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr + %g = getelementptr inbounds i32, i32* %i, i32 -128 + %l = load i32, i32* %g, align 4 + %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l) + %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer + ret <4 x i32> %s +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) From db574fc7ba2a2faafdbf42f8759e206dac0fc4b2 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Thu, 3 Sep 2020 18:11:29 +0100 Subject: [PATCH 135/465] [libc++] Make ext-int.verify.cpp test compatible with c++03 Currently the libcxx/atomics/ext-int.verify.cpp test fails when run with -std=c++03 because there's an extra error due to using list initialization. Fix this by using parentheses instead. --- libcxx/test/libcxx/atomics/ext-int.verify.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/libcxx/atomics/ext-int.verify.cpp b/libcxx/test/libcxx/atomics/ext-int.verify.cpp index 7cedcf1995617..1892293323e15 100644 --- a/libcxx/test/libcxx/atomics/ext-int.verify.cpp +++ b/libcxx/test/libcxx/atomics/ext-int.verify.cpp @@ -20,7 +20,7 @@ int main(int, char**) { // expected-error@atomic:*1 {{_Atomic cannot be applied to integer type '_ExtInt(32)'}} - std::atomic<_ExtInt(32)> x {42}; + std::atomic<_ExtInt(32)> x(42); return 0; } From d0706cfc4c6715243c32fd2dd98c6a0a0ed92f38 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 3 Sep 2020 13:27:58 -0400 Subject: [PATCH 136/465] [libc++] Mark std::atomic<_ExtInt> test as unsupported in C++03 --- libcxx/test/libcxx/atomics/ext-int.verify.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcxx/test/libcxx/atomics/ext-int.verify.cpp b/libcxx/test/libcxx/atomics/ext-int.verify.cpp index 1892293323e15..39fa574da8064 100644 --- a/libcxx/test/libcxx/atomics/ext-int.verify.cpp +++ b/libcxx/test/libcxx/atomics/ext-int.verify.cpp @@ -15,6 +15,8 @@ // UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8, clang-9, clang-10 // UNSUPPORTED: apple-clang-9, apple-clang-10, apple-clang-11, apple-clang-12 +// UNSUPPORTED: c++03 + #include int main(int, char**) From 8d420fb3a02d8ef61e43936c1e63d5556684b282 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 3 Sep 2020 13:29:14 -0400 Subject: [PATCH 137/465] [spirv][nfc] Simplify resource limit with default values These deafult values are gotten from Vulkan required limits. Reviewed By: hanchung Differential Revision: https://reviews.llvm.org/D87090 --- .../mlir/Dialect/SPIRV/TargetAndABI.td | 14 ++- mlir/test/Conversion/GPUToSPIRV/if.mlir | 4 +- .../Conversion/GPUToSPIRV/load-store.mlir | 4 +- mlir/test/Conversion/GPUToSPIRV/loop.mlir | 4 +- .../GPUToSPIRV/module-structure-opencl.mlir | 5 +- .../LinalgToSPIRV/linalg-to-spirv.mlir | 24 +--- .../Conversion/StandardToSPIRV/alloc.mlir | 32 ++--- .../StandardToSPIRV/std-ops-to-spirv.mlir | 50 ++------ .../StandardToSPIRV/std-types-to-spirv.mlir | 112 ++++-------------- .../Transforms/abi-interface-opencl.mlir | 5 +- .../SPIRV/Transforms/abi-interface.mlir | 4 +- .../SPIRV/Transforms/abi-load-store.mlir | 4 +- .../SPIRV/Transforms/vce-deduction.mlir | 47 ++------ mlir/test/Dialect/SPIRV/target-and-abi.mlir | 9 -- mlir/test/Dialect/SPIRV/target-env.mlir | 32 ++--- mlir/test/mlir-vulkan-runner/addf.mlir | 4 +- mlir/test/mlir-vulkan-runner/addi.mlir | 4 +- mlir/test/mlir-vulkan-runner/addi8.mlir | 4 +- mlir/test/mlir-vulkan-runner/mulf.mlir | 4 +- mlir/test/mlir-vulkan-runner/subf.mlir | 4 +- mlir/test/mlir-vulkan-runner/time.mlir | 4 +- 21 files changed, 98 insertions(+), 276 deletions(-) diff --git a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td index 231ec54f09f43..04fcc8e0b53ed 100644 --- a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td +++ b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td @@ -39,12 +39,16 @@ def SPV_CapabilityArrayAttr : TypedArrayAttrBase< // This attribute specifies the limits for various resources on the target // architecture. // -// See https://renderdoc.org/vkspec_chunked/chap36.html#limits for the complete -// list of limits and their explanation for the Vulkan API. The following ones -// are those affecting SPIR-V CodeGen. +// See https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#limits +// for the complete list of limits and their explanation for the Vulkan API. +// The following ones are those affecting SPIR-V CodeGen. Their default value +// are the from Vulkan limit requirements: +// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#limits-minmax def SPV_ResourceLimitsAttr : StructAttr<"ResourceLimitsAttr", SPIRV_Dialect, [ - StructFieldAttr<"max_compute_workgroup_invocations", I32Attr>, - StructFieldAttr<"max_compute_workgroup_size", I32ElementsAttr> + StructFieldAttr<"max_compute_workgroup_invocations", + DefaultValuedAttr>, + StructFieldAttr<"max_compute_workgroup_size", + DefaultValuedAttr> ]>; #endif // SPIRV_TARGET_AND_ABI diff --git a/mlir/test/Conversion/GPUToSPIRV/if.mlir b/mlir/test/Conversion/GPUToSPIRV/if.mlir index b7e11d74996bd..9651946118a67 100644 --- a/mlir/test/Conversion/GPUToSPIRV/if.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir @@ -3,9 +3,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @main(%arg0 : memref<10xf32>, %arg1 : i1) { %c0 = constant 1 : index diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir index da57db15bedce..b9ae8bdfeacdc 100644 --- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir @@ -3,9 +3,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @load_store(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>) { %c0 = constant 0 : index diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir index 2205c60f875f5..c181e1956f83a 100644 --- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir @@ -3,9 +3,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @loop(%arg0 : memref<10xf32>, %arg1 : memref<10xf32>) { %c0 = constant 1 : index diff --git a/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir b/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir index 1b5b4d52d8b88..0e2a45f9bf3cb 100644 --- a/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir @@ -2,10 +2,7 @@ module attributes { gpu.container_module, - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { gpu.module @kernels { // CHECK-LABEL: spv.module @{{.*}} Physical64 OpenCL diff --git a/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir b/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir index cebd541977ef1..d437ab160b927 100644 --- a/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir +++ b/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir @@ -16,11 +16,7 @@ module attributes { spv.target_env = #spv.target_env< - #spv.vce, - { - max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32> - }> + #spv.vce, {}> } { // CHECK: spv.globalVariable @@ -78,11 +74,7 @@ func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) module attributes { spv.target_env = #spv.target_env< - #spv.vce, - { - max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32> - }> + #spv.vce, {}> } { func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) { // expected-error @+1 {{failed to legalize operation 'linalg.generic'}} @@ -111,11 +103,7 @@ func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) module attributes { spv.target_env = #spv.target_env< - #spv.vce, - { - max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32> - }> + #spv.vce, {}> } { func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) attributes { spv.entry_point_abi = {local_size = dense<[32, 1, 1]>: vector<3xi32>} @@ -146,11 +134,7 @@ func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) module attributes { spv.target_env = #spv.target_env< - #spv.vce, - { - max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32> - }> + #spv.vce, {}> } { func @single_workgroup_reduction(%input: memref<16x8xi32>, %output: memref<16xi32>) attributes { spv.entry_point_abi = {local_size = dense<[16, 8, 1]>: vector<3xi32>} diff --git a/mlir/test/Conversion/StandardToSPIRV/alloc.mlir b/mlir/test/Conversion/StandardToSPIRV/alloc.mlir index 14ce4699a4550..ccd8c02e255ac 100644 --- a/mlir/test/Conversion/StandardToSPIRV/alloc.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/alloc.mlir @@ -6,9 +6,7 @@ module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_workgroup_mem(%arg0 : index, %arg1 : index) { @@ -34,9 +32,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_workgroup_mem(%arg0 : index, %arg1 : index) { @@ -65,9 +61,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @two_allocs() { @@ -88,9 +82,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @two_allocs_vector() { @@ -112,9 +104,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_dynamic_workgroup_mem(%arg0 : index) { @@ -129,9 +119,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_mem() { @@ -146,9 +134,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_dynamic_workgroup_mem(%arg0 : memref<4x?xf32, 3>) { @@ -163,9 +149,7 @@ module attributes { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { func @alloc_dealloc_mem(%arg0 : memref<4x5xf32>) { diff --git a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir index 1b83af1be7551..ce38ba8b3f5e6 100644 --- a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir @@ -6,9 +6,7 @@ module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // Check integer operation conversions. @@ -146,10 +144,7 @@ func @unsupported_2x2elem_vector(%arg0: vector<2x2xi32>) { // Check that types are converted to 32-bit when no special capabilities. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: @int_vector23 @@ -177,10 +172,7 @@ func @float_scalar(%arg0: f16, %arg1: f64) { // Check that types are converted to 32-bit when no special capabilities that // are not supported. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { func @int_vector4_invalid(%arg0: vector<4xi64>) { @@ -199,10 +191,7 @@ func @int_vector4_invalid(%arg0: vector<4xi64>) { //===----------------------------------------------------------------------===// module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: @bitwise_scalar @@ -348,9 +337,7 @@ func @boolcmpi(%arg0 : i1, %arg1 : i1) { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: @constant @@ -412,10 +399,7 @@ func @constant_64bit() { // Check that constants are converted to 32-bit when no special capability. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: @constant_16bit @@ -498,9 +482,7 @@ func @unsupported_cases() { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: index_cast1 @@ -631,10 +613,7 @@ func @fptosi2(%arg0 : f16) -> i16 { // Checks that cast types will be adjusted when no special capabilities for // non-32-bit scalar types. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: @fpext1 @@ -682,9 +661,8 @@ func @sitofp(%arg0 : i64) { module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { //===----------------------------------------------------------------------===// @@ -750,9 +728,7 @@ func @load_store_zero_rank_int(%arg0: memref, %arg1: memref) { // TODO: Test i1 and i64 types. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: @load_i8 @@ -895,9 +871,7 @@ func @store_f32(%arg0: memref, %value: f32) { module attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + [SPV_KHR_storage_buffer_storage_class, SPV_KHR_16bit_storage]>, {}> } { // CHECK-LABEL: @load_i8 diff --git a/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir index 5ea44c18c6183..66b2ba97bea1e 100644 --- a/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir @@ -7,10 +7,7 @@ // Check that non-32-bit integer types are converted to 32-bit types if the // corresponding capabilities are not available. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @integer8 @@ -38,10 +35,7 @@ func @integer64(%arg0: i64, %arg1: si64, %arg2: ui64) { return } // Check that non-32-bit integer types are kept untouched if the corresponding // capabilities are available. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @integer8 @@ -68,10 +62,7 @@ func @integer64(%arg0: i64, %arg1: si64, %arg2: ui64) { return } // Check that weird bitwidths are not supported. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-NOT: spv.func @integer4 @@ -92,10 +83,7 @@ func @integer42(%arg0: i42) { return } // The index type is always converted into i32. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @index_type @@ -113,10 +101,7 @@ func @index_type(%arg0: index) { return } // Check that non-32-bit float types are converted to 32-bit types if the // corresponding capabilities are not available. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @float16 @@ -134,10 +119,7 @@ func @float64(%arg0: f64) { return } // Check that non-32-bit float types are kept untouched if the corresponding // capabilities are available. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @float16 @@ -154,10 +136,7 @@ func @float64(%arg0: f64) { return } // Check that bf16 is not supported. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-NOT: spv.func @bf16_type @@ -174,10 +153,7 @@ func @bf16_type(%arg0: bf16) { return } // Check that capabilities for scalar types affects vector types too: no special // capabilities available means using turning element types to 32-bit. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @int_vector @@ -206,9 +182,7 @@ func @float_vector( // special capabilities means keep vector types untouched. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.func @int_vector @@ -235,10 +209,7 @@ func @float_vector( // Check that 1- or > 4-element vectors are not supported. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-NOT: spv.func @one_element_vector @@ -258,9 +229,7 @@ func @large_vector(%arg0: vector<1024xi32>) { return } // Check memory spaces. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: func @memref_mem_space @@ -285,10 +254,7 @@ func @memref_mem_space( // Check that boolean memref is not supported at the moment. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: func @memref_type({{%.*}}: memref<3xi1>) @@ -304,10 +270,7 @@ func @memref_type(%arg0: memref<3xi1>) { // requires special capability and extension: convert them to 32-bit if not // satisfied. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @memref_8bit_StorageBuffer @@ -352,9 +315,7 @@ func @memref_16bit_Output(%arg4: memref<16xf16, 10>) { return } module attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + [SPV_KHR_8bit_storage, SPV_KHR_16bit_storage]>, {}> } { // CHECK-LABEL: spv.func @memref_8bit_PushConstant @@ -379,9 +340,7 @@ func @memref_16bit_PushConstant( module attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + [SPV_KHR_8bit_storage, SPV_KHR_16bit_storage]>, {}> } { // CHECK-LABEL: spv.func @memref_8bit_StorageBuffer @@ -406,9 +365,7 @@ func @memref_16bit_StorageBuffer( module attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + [SPV_KHR_8bit_storage, SPV_KHR_16bit_storage]>, {}> } { // CHECK-LABEL: spv.func @memref_8bit_Uniform @@ -432,9 +389,7 @@ func @memref_16bit_Uniform( // and extension is available. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.func @memref_16bit_Input @@ -452,9 +407,7 @@ func @memref_16bit_Output(%arg4: memref<16xi16, 10>) { return } // Check that memref offset and strides affect the array size. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.func @memref_offset_strides @@ -488,10 +441,7 @@ func @memref_offset_strides( // Dynamic shapes module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // Check that unranked shapes are not supported. @@ -512,10 +462,7 @@ func @dynamic_dim_memref(%arg0: memref<8x?xi32>, // Vector types module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: func @memref_vector @@ -539,10 +486,7 @@ func @dynamic_dim_memref_vector(%arg0: memref<8x?xvector<4xi32>>, // Vector types, check that sizes not available in SPIR-V are not transformed. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: func @memref_vector_wrong_size @@ -562,9 +506,7 @@ func @memref_vector_wrong_size( // Check that tensor element types are kept untouched with proper capabilities. module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.func @int_tensor_types @@ -595,10 +537,7 @@ func @float_tensor_types( // Check that tensor element types are changed to 32-bit without capabilities. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: spv.func @int_tensor_types @@ -629,10 +568,7 @@ func @float_tensor_types( // Check that dynamic shapes are not supported. module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK-LABEL: func @unranked_tensor diff --git a/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir b/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir index 54b810f43aec3..1de6b71d888d1 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir @@ -1,10 +1,7 @@ // RUN: mlir-opt -spirv-lower-abi-attrs -verify-diagnostics %s -o - | FileCheck %s module attributes { - spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { spv.module Physical64 OpenCL { // CHECK-LABEL: spv.module diff --git a/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir b/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir index 28c44bf7b936d..5b06745eba874 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir @@ -2,9 +2,7 @@ module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.module diff --git a/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir b/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir index 3d37f35b1c466..7d1a174fa3671 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir @@ -2,9 +2,7 @@ module attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { // CHECK-LABEL: spv.module diff --git a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir index 572db88e5f9ec..74484fd7ab6b1 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir @@ -10,9 +10,7 @@ // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd(%val : i32) -> i32 "None" { %0 = spv.IAdd %val, %val: i32 @@ -26,9 +24,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @group_non_uniform_ballot(%predicate : i1) -> vector<4xi32> "None" { %0 = spv.GroupNonUniformBallot "Workgroup" %predicate : vector<4xi32> @@ -45,9 +41,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd(%val : i32) -> i32 "None" { %0 = spv.IAdd %val, %val: i32 @@ -61,9 +55,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd(%val : i32) -> i32 "None" { %0 = spv.IAdd %val, %val: i32 @@ -84,9 +76,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @group_non_uniform_iadd(%val : i32) -> i32 "None" { %0 = spv.GroupNonUniformIAdd "Subgroup" "Reduce" %val : i32 @@ -97,9 +87,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @group_non_uniform_iadd(%val : i32) -> i32 "None" { %0 = spv.GroupNonUniformIAdd "Subgroup" "Reduce" %val : i32 @@ -113,9 +101,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd_function(%val : i8) -> i8 "None" { %0 = spv.IAdd %val, %val : i8 @@ -127,9 +113,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @fadd_function(%val : f16) -> f16 "None" { %0 = spv.FAdd %val, %val : f16 @@ -148,9 +132,7 @@ spv.module Logical GLSL450 attributes { spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + [SPV_KHR_shader_ballot, SPV_KHR_shader_clock, SPV_KHR_variable_pointers]>, {}> } { spv.func @subgroup_ballot(%predicate : i1) -> vector<4xi32> "None" { %0 = spv.SubgroupBallotKHR %predicate: vector<4xi32> @@ -165,9 +147,7 @@ spv.module Logical GLSL450 attributes { // CHECK: requires #spv.vce spv.module Logical Vulkan attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd(%val : i32) -> i32 "None" { %0 = spv.IAdd %val, %val: i32 @@ -182,9 +162,7 @@ spv.module Logical Vulkan attributes { // CHECK: requires #spv.vce spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { spv.func @iadd_storage_buffer(%ptr : !spv.ptr) -> i16 "None" { %0 = spv.Load "StorageBuffer" %ptr : i16 @@ -200,8 +178,7 @@ spv.module Logical GLSL450 attributes { spv.module Logical GLSL450 attributes { spv.target_env = #spv.target_env< #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + {}> } { spv.globalVariable @data : !spv.ptr, Uniform> spv.globalVariable @img : !spv.ptr, UniformConstant> diff --git a/mlir/test/Dialect/SPIRV/target-and-abi.mlir b/mlir/test/Dialect/SPIRV/target-and-abi.mlir index 8d11f4ca0c642..cd338752600ab 100644 --- a/mlir/test/Dialect/SPIRV/target-and-abi.mlir +++ b/mlir/test/Dialect/SPIRV/target-and-abi.mlir @@ -104,15 +104,6 @@ func @interface_var( // spv.target_env //===----------------------------------------------------------------------===// -func @target_env_missing_limits() attributes { - spv.target_env = #spv.target_env< - #spv.vce, - // expected-error @+1 {{limits must be a dictionary attribute containing two 32-bit integer attributes 'max_compute_workgroup_invocations' and 'max_compute_workgroup_size'}} - {max_compute_workgroup_size = dense<[128, 64, 64]> : vector<3xi32>}> -} { return } - -// ----- - func @target_env_wrong_limits() attributes { spv.target_env = #spv.target_env< #spv.vce, diff --git a/mlir/test/Dialect/SPIRV/target-env.mlir b/mlir/test/Dialect/SPIRV/target-env.mlir index 27c4e8d04092b..c0bc02fae0894 100644 --- a/mlir/test/Dialect/SPIRV/target-env.mlir +++ b/mlir/test/Dialect/SPIRV/target-env.mlir @@ -35,7 +35,7 @@ // CHECK-LABEL: @cmp_exchange_weak_suitable_version_capabilities func @cmp_exchange_weak_suitable_version_capabilities(%ptr: !spv.ptr, %value: i32, %comparator: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.AtomicCompareExchangeWeak "Workgroup" "AcquireRelease|AtomicCounterMemory" "Acquire" %0 = "test.convert_to_atomic_compare_exchange_weak_op"(%ptr, %value, %comparator): (!spv.ptr, i32, i32) -> (i32) @@ -44,7 +44,7 @@ func @cmp_exchange_weak_suitable_version_capabilities(%ptr: !spv.ptr, %value: i32, %comparator: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_atomic_compare_exchange_weak_op %0 = "test.convert_to_atomic_compare_exchange_weak_op"(%ptr, %value, %comparator): (!spv.ptr, i32, i32) -> (i32) @@ -57,7 +57,7 @@ func @cmp_exchange_weak_unsupported_version(%ptr: !spv.ptr, %val // CHECK-LABEL: @group_non_uniform_ballot_suitable_version func @group_non_uniform_ballot_suitable_version(%predicate: i1) -> vector<4xi32> attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.GroupNonUniformBallot "Workgroup" %0 = "test.convert_to_group_non_uniform_ballot_op"(%predicate): (i1) -> (vector<4xi32>) @@ -66,7 +66,7 @@ func @group_non_uniform_ballot_suitable_version(%predicate: i1) -> vector<4xi32> // CHECK-LABEL: @group_non_uniform_ballot_unsupported_version func @group_non_uniform_ballot_unsupported_version(%predicate: i1) -> vector<4xi32> attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_group_non_uniform_ballot_op %0 = "test.convert_to_group_non_uniform_ballot_op"(%predicate): (i1) -> (vector<4xi32>) @@ -79,7 +79,7 @@ func @group_non_uniform_ballot_unsupported_version(%predicate: i1) -> vector<4xi // CHECK-LABEL: @cmp_exchange_weak_missing_capability_kernel func @cmp_exchange_weak_missing_capability_kernel(%ptr: !spv.ptr, %value: i32, %comparator: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_atomic_compare_exchange_weak_op %0 = "test.convert_to_atomic_compare_exchange_weak_op"(%ptr, %value, %comparator): (!spv.ptr, i32, i32) -> (i32) @@ -88,7 +88,7 @@ func @cmp_exchange_weak_missing_capability_kernel(%ptr: !spv.ptr // CHECK-LABEL: @cmp_exchange_weak_missing_capability_atomic_storage func @cmp_exchange_weak_missing_capability_atomic_storage(%ptr: !spv.ptr, %value: i32, %comparator: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_atomic_compare_exchange_weak_op %0 = "test.convert_to_atomic_compare_exchange_weak_op"(%ptr, %value, %comparator): (!spv.ptr, i32, i32) -> (i32) @@ -97,7 +97,7 @@ func @cmp_exchange_weak_missing_capability_atomic_storage(%ptr: !spv.ptr vector<4xi32> attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_subgroup_ballot_op %0 = "test.convert_to_subgroup_ballot_op"(%predicate): (i1) -> (vector<4xi32>) @@ -106,7 +106,7 @@ func @subgroup_ballot_missing_capability(%predicate: i1) -> vector<4xi32> attrib // CHECK-LABEL: @bit_reverse_directly_implied_capability func @bit_reverse_directly_implied_capability(%operand: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.BitReverse %0 = "test.convert_to_bit_reverse_op"(%operand): (i32) -> (i32) @@ -115,7 +115,7 @@ func @bit_reverse_directly_implied_capability(%operand: i32) -> i32 attributes { // CHECK-LABEL: @bit_reverse_recursively_implied_capability func @bit_reverse_recursively_implied_capability(%operand: i32) -> i32 attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.BitReverse %0 = "test.convert_to_bit_reverse_op"(%operand): (i32) -> (i32) @@ -128,7 +128,7 @@ func @bit_reverse_recursively_implied_capability(%operand: i32) -> i32 attribute // CHECK-LABEL: @subgroup_ballot_suitable_extension func @subgroup_ballot_suitable_extension(%predicate: i1) -> vector<4xi32> attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.SubgroupBallotKHR %0 = "test.convert_to_subgroup_ballot_op"(%predicate): (i1) -> (vector<4xi32>) @@ -137,7 +137,7 @@ func @subgroup_ballot_suitable_extension(%predicate: i1) -> vector<4xi32> attrib // CHECK-LABEL: @subgroup_ballot_missing_extension func @subgroup_ballot_missing_extension(%predicate: i1) -> vector<4xi32> attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_subgroup_ballot_op %0 = "test.convert_to_subgroup_ballot_op"(%predicate): (i1) -> (vector<4xi32>) @@ -146,7 +146,7 @@ func @subgroup_ballot_missing_extension(%predicate: i1) -> vector<4xi32> attribu // CHECK-LABEL: @module_suitable_extension1 func @module_suitable_extension1() attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.module PhysicalStorageBuffer64 Vulkan "test.convert_to_module_op"() : () ->() @@ -155,7 +155,7 @@ func @module_suitable_extension1() attributes { // CHECK-LABEL: @module_suitable_extension2 func @module_suitable_extension2() attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.module PhysicalStorageBuffer64 Vulkan "test.convert_to_module_op"() : () -> () @@ -164,7 +164,7 @@ func @module_suitable_extension2() attributes { // CHECK-LABEL: @module_missing_extension_mm func @module_missing_extension_mm() attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_module_op "test.convert_to_module_op"() : () -> () @@ -173,7 +173,7 @@ func @module_missing_extension_mm() attributes { // CHECK-LABEL: @module_missing_extension_am func @module_missing_extension_am() attributes { - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: test.convert_to_module_op "test.convert_to_module_op"() : () -> () @@ -183,7 +183,7 @@ func @module_missing_extension_am() attributes { // CHECK-LABEL: @module_implied_extension func @module_implied_extension() attributes { // Version 1.5 implies SPV_KHR_vulkan_memory_model and SPV_KHR_physical_storage_buffer. - spv.target_env = #spv.target_env<#spv.vce, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + spv.target_env = #spv.target_env<#spv.vce, {}> } { // CHECK: spv.module PhysicalStorageBuffer64 Vulkan "test.convert_to_module_op"() : () -> () diff --git a/mlir/test/mlir-vulkan-runner/addf.mlir b/mlir/test/mlir-vulkan-runner/addf.mlir index 73622e37ade57..6cb7cdec3442e 100644 --- a/mlir/test/mlir-vulkan-runner/addf.mlir +++ b/mlir/test/mlir-vulkan-runner/addf.mlir @@ -4,9 +4,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_add(%arg0 : memref<8xf32>, %arg1 : memref<8xf32>, %arg2 : memref<8xf32>) diff --git a/mlir/test/mlir-vulkan-runner/addi.mlir b/mlir/test/mlir-vulkan-runner/addi.mlir index c690120718b2e..696c5015565db 100644 --- a/mlir/test/mlir-vulkan-runner/addi.mlir +++ b/mlir/test/mlir-vulkan-runner/addi.mlir @@ -4,9 +4,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_addi(%arg0 : memref<8xi32>, %arg1 : memref<8x8xi32>, %arg2 : memref<8x8x8xi32>) diff --git a/mlir/test/mlir-vulkan-runner/addi8.mlir b/mlir/test/mlir-vulkan-runner/addi8.mlir index 094186d5731d3..eeb5222856961 100644 --- a/mlir/test/mlir-vulkan-runner/addi8.mlir +++ b/mlir/test/mlir-vulkan-runner/addi8.mlir @@ -4,9 +4,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_addi(%arg0 : memref<8xi8>, %arg1 : memref<8x8xi8>, %arg2 : memref<8x8x8xi32>) diff --git a/mlir/test/mlir-vulkan-runner/mulf.mlir b/mlir/test/mlir-vulkan-runner/mulf.mlir index be0bd5afb4252..0abcb53ebfe6b 100644 --- a/mlir/test/mlir-vulkan-runner/mulf.mlir +++ b/mlir/test/mlir-vulkan-runner/mulf.mlir @@ -4,9 +4,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_mul(%arg0 : memref<4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<4x4xf32>) diff --git a/mlir/test/mlir-vulkan-runner/subf.mlir b/mlir/test/mlir-vulkan-runner/subf.mlir index 5fc7e0a91d29b..77c1f8841e8be 100644 --- a/mlir/test/mlir-vulkan-runner/subf.mlir +++ b/mlir/test/mlir-vulkan-runner/subf.mlir @@ -4,9 +4,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_sub(%arg0 : memref<8x4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<8x4x4xf32>) diff --git a/mlir/test/mlir-vulkan-runner/time.mlir b/mlir/test/mlir-vulkan-runner/time.mlir index 9a96d7f819fde..21b4b76d1df08 100644 --- a/mlir/test/mlir-vulkan-runner/time.mlir +++ b/mlir/test/mlir-vulkan-runner/time.mlir @@ -7,9 +7,7 @@ module attributes { gpu.container_module, spv.target_env = #spv.target_env< - #spv.vce, - {max_compute_workgroup_invocations = 128 : i32, - max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> + #spv.vce, {}> } { gpu.module @kernels { gpu.func @kernel_add(%arg0 : memref<16384xf32>, %arg1 : memref<16384xf32>, %arg2 : memref<16384xf32>) From 1673a080443327829fc55bb1299904e8831ca636 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Sep 2020 18:24:06 +0100 Subject: [PATCH 138/465] SelectionDAG.h - remove unnecessary FunctionLoweringInfo.h include. NFCI. Use forward declarations and move the include down to dependent files that actually use it. This also exposes a number of implicit dependencies on KnownBits.h --- llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h | 4 ++-- llvm/include/llvm/CodeGen/SelectionDAG.h | 2 +- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 + llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 3 ++- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 1 + llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 1 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 ++- llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp | 3 ++- 8 files changed, 12 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 033d5b4b58348..38eb0e4bebe74 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -20,9 +20,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" -#include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Allocator.h" @@ -37,7 +38,6 @@ class CallInst; class CallLowering; class Constant; class DataLayout; -class FunctionLoweringInfo; class Instruction; class MachineBasicBlock; class MachineFunction; diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index c285a68da9b03..5607e785e349a 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -27,7 +27,6 @@ #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/DAGCombine.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -64,6 +63,7 @@ class ConstantFP; class ConstantInt; class DataLayout; struct fltSemantics; +class FunctionLoweringInfo; class GlobalValue; struct KnownBits; class LegacyDivergenceAnalysis; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 82850f15feeef..ff6c642e3d4f3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 078c9691f8dc4..12745747f5f80 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -11,8 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index ee6d35ddddf8a..07b4992bbf571 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -44,6 +44,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/IntegerDivision.h" #include #include diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index f882bfb568ee2..fb954e6bbba1c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c04f569cebdee..ad9c4d0673476 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -34,8 +34,9 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/DAGCombine.h" -#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp index f7623da2c055e..584ef65b20bce 100644 --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -6,11 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/AsmParser/Parser.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" From 5fe33f7399d86d5a36a8437db0661b1509200815 Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Thu, 3 Sep 2020 11:01:49 -0700 Subject: [PATCH 139/465] [DebugInfo] Make DWARF ignore sizes on forward declared class types. Make sure the sizes for forward declared classes aren't emitted in DWARF. This comes before https://reviews.llvm.org/D87062, which adds sizes to all classes with definitions. Bug: https://bugs.llvm.org/show_bug.cgi?id=47338 Differential Revision: https://reviews.llvm.org/D87070 --- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 4 +++- llvm/test/DebugInfo/X86/struct-fwd-decl.ll | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 llvm/test/DebugInfo/X86/struct-fwd-decl.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index ceed1fe6e3bd5..40c741077d1ad 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1013,8 +1013,10 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) + // Ignore the size if it's a non-enum forward decl. // TODO: Do we care about size for enum forward declarations? - if (Size) + if (Size && + (!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type)) addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); else if (!CTy->isForwardDecl()) // Add zero size if it is not a forward declaration. diff --git a/llvm/test/DebugInfo/X86/struct-fwd-decl.ll b/llvm/test/DebugInfo/X86/struct-fwd-decl.ll new file mode 100644 index 0000000000000..adee78d1ced11 --- /dev/null +++ b/llvm/test/DebugInfo/X86/struct-fwd-decl.ll @@ -0,0 +1,21 @@ +; RUN: llc -O0 -mtriple=x86_64-unknown-linux %s -o %t -filetype=obj +; RUN: llvm-dwarfdump -debug-info %t | FileCheck %s +; Test that size is not emitted for class declarations in DWARF, even if it exists. + +@s = global i16 0, align 2, !dbg !0 + +!llvm.dbg.cu = !{!4} +!llvm.module.flags = !{!7} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = !DIGlobalVariable(name: "s", scope: null, file: !2, line: 2, type: !3, isLocal: false, isDefinition: true) +!2 = !DIFile(filename: "foo.cpp", directory: "/tmp") +!3 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !2, line: 1, size: 16, align: 16, flags: DIFlagFwdDecl) +!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !2, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !5, retainedTypes: !5, globals: !6, imports: !5) +!5 = !{} +; CHECK: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_name +; CHECK-NOT: DW_AT_byte_size +; CHECK: {{NULL|DW_TAG}} +!6 = !{!0} +!7 = !{i32 1, !"Debug Info Version", i32 3} From b2e65cf9501d791429e80cba8a9b8ed3d3f193f8 Mon Sep 17 00:00:00 2001 From: Jamie Schmeiser Date: Thu, 3 Sep 2020 18:24:00 +0000 Subject: [PATCH 140/465] Revert "Add new hidden option -print-changed which only reports changes to IR" This reverts commit 7bc9924cb2fbd9f3ae53577607822ace267a04e6 due to failure caused by missing a space between trailing >>, required by some versions of C++:wq. --- .../llvm/Passes/StandardInstrumentations.h | 94 -------- llvm/lib/IR/LegacyPassManager.cpp | 4 +- llvm/lib/Passes/StandardInstrumentations.cpp | 219 ------------------ llvm/test/Other/change-printer.ll | 109 --------- 4 files changed, 2 insertions(+), 424 deletions(-) delete mode 100644 llvm/test/Other/change-printer.ll diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 5a93c51ad18bc..795e2770bbe18 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -25,7 +25,6 @@ namespace llvm { -class Function; class Module; /// Instrumentation to print IR before/after passes. @@ -74,98 +73,6 @@ class PrintPassInstrumentation { bool DebugLogging; }; -// Base class for classes that report changes to the IR. -// It presents an interface for such classes and provides callbacks -// on various events as the new pass manager transforms the IR. -// It also provides filtering of information based on hidden options -// specifying which functions are interesting. -// Callbacks are made for the following events/queries: -// 1. The initial IR processed. -// 2. To get the representation of the IR (of type \p T). -// 3. When a pass does not change the IR. -// 4. When a pass changes the IR (given both before and after representations -// of type \p T). -// 5. When an IR is invalidated. -// 6. When a pass is run on an IR that is not interesting (based on options). -// 7. When a pass is ignored (pass manager or adapter pass). -// 8. To compare two IR representations (of type \p T). -template class ChangePrinter { -protected: - ChangePrinter( - std::function HandleInitialIRFunc, - std::function - GenerateIRRepresentationFunc, - std::function OmitAfterFunc, - std::function - HandleAfterFunc, - std::function HandleInvalidatedFunc, - std::function - HandleFilteredFunc, - std::function - HandleIgnoredFunc, - std::function SameFunc) - : HandleInitialIR(HandleInitialIRFunc), - GenerateIRRepresentation(GenerateIRRepresentationFunc), - OmitAfter(OmitAfterFunc), HandleAfter(HandleAfterFunc), - HandleInvalidated(HandleInvalidatedFunc), - HandleFiltered(HandleFilteredFunc), HandleIgnored(HandleIgnoredFunc), - Same(SameFunc), InitialIR(true) {} - -public: - // Not virtual as classes are expected to be referenced as derived classes. - ~ChangePrinter() { - assert(BeforeStack.empty() && "Problem with Change Printer stack."); - } - - // Determine if this pass/IR is interesting and if so, save the IR - // otherwise it is left on the stack without data - void saveIRBeforePass(Any IR, StringRef PassID); - // Compare the IR from before the pass after the pass. - void handleIRAfterPass(Any IR, StringRef PassID); - // Handle the situation where a pass is invalidated. - void handleInvalidatedPass(StringRef PassID); - -private: - // callback on the first IR processed - std::function HandleInitialIR; - // callback before and after a pass to get the representation of the IR - std::function - GenerateIRRepresentation; - // callback when the pass is not iteresting - std::function OmitAfter; - // callback when interesting IR has changed - std::function - HandleAfter; - // callback when an interesting pass is invalidated - std::function HandleInvalidated; - // callback when the IR or pass is not interesting - std::function HandleFiltered; - // callback when an ignored pass is encountered - std::function HandleIgnored; - // callback to compare the before and after representations of the IR - std::function Same; - - // stack of IRs before passes - std::vector BeforeStack; - // Is this the first IR seen? - bool InitialIR; -}; - -// A change printer based on the string representation of the IR as created -// by unwrapAndPrint. The string representation is stored in a std::string -// to preserve it as the IR changes in each pass. Note that the banner is -// included in this representation but it is massaged before reporting. -class IRChangePrinter : public ChangePrinter { -public: - IRChangePrinter(); - void registerCallbacks(PassInstrumentationCallbacks &PIC); - -protected: - raw_ostream &Out; -}; - /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -173,7 +80,6 @@ class StandardInstrumentations { PrintPassInstrumentation PrintPass; TimePassesHandler TimePasses; OptNoneInstrumentation OptNone; - IRChangePrinter PrintChangedIR; public: StandardInstrumentations(bool DebugLogging) : PrintPass(DebugLogging) {} diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 63886f4861708..8d9ed917bb617 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -87,14 +87,14 @@ static cl::opt PrintAfterAll("print-after-all", static cl::opt PrintModuleScope("print-module-scope", cl::desc("When printing IR for print-[before|after]{-all} " - "and change reporters always print a module IR"), + "always print a module IR"), cl::init(false), cl::Hidden); static cl::list PrintFuncsList("filter-print-funcs", cl::value_desc("function names"), cl::desc("Only print IR for functions whose name " "match this for all print-[before|after][-all] " - "and change reporter options"), + "options"), cl::CommaSeparated, cl::Hidden); /// This is a helper to determine whether to print IR before or diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index d36bfd87f8a62..da58fa57bdae7 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -26,7 +26,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" -#include #include using namespace llvm; @@ -44,34 +43,6 @@ static cl::opt cl::desc("Print all pass management debugging information. " "`-debug-pass-manager` must also be specified")); -// A hidden option that prints out the IR after passes, similar to -// -print-after-all except that it only prints the IR after passes that -// change the IR. Those passes that do not make changes to the IR are -// reported as not making any changes. In addition, the initial IR is -// also reported. Other hidden options affect the output from this -// option. -filter-passes will limit the output to the named passes -// that actually change the IR and other passes are reported as filtered out. -// The specified passes will either be reported as making no changes (with -// no IR reported) or the changed IR will be reported. Also, the -// -filter-print-funcs and -print-module-scope options will do similar -// filtering based on function name, reporting changed IRs as functions(or -// modules if -print-module-scope is specified) for a particular function -// or indicating that the IR has been filtered out. The extra options -// can be combined, allowing only changed IRs for certain passes on certain -// functions to be reported in different formats, with the rest being -// reported as filtered out. -static cl::opt PrintChanged("print-changed", - cl::desc("Print changed IRs"), - cl::init(false), cl::Hidden); -// A hidden option that supports the -print-changed option. See -// the description for -print-changed for an explanation of the use -// of this option. Note that this option has no effect without -print-changed. -static cl::list - PrintPassesList("filter-passes", cl::value_desc("pass names"), - cl::desc("Only consider IR changes for passes whose names " - "match for the print-changed option"), - cl::CommaSeparated, cl::Hidden); - namespace { /// Extracting Module out of \p IR unit. Also fills a textual description @@ -218,197 +189,8 @@ void unwrapAndPrint(raw_ostream &OS, Any IR, StringRef Banner, llvm_unreachable("Unknown wrapped IR type"); } -// Return true when this is a pass for which printing of changes is desired. -inline bool isIgnored(StringRef PassID) { - return PassID.startswith("PassManager<") || PassID.contains("PassAdaptor<"); -} - -// Return true when this is a defined function for which printing -// of changes is desired. -inline bool isInterestingFunction(const Function &F) { - return llvm::isFunctionInPrintList(F.getName()); -} - -// Return true when this is a pass for which printing of changes is desired. -inline bool isInterestingPass(StringRef PassID) { - if (isIgnored(PassID)) - return false; - - static std::unordered_set PrintPassNames(PrintPassesList.begin(), - PrintPassesList.end()); - return PrintPassNames.empty() || PrintPassNames.count(PassID.str()); -} - -// Return true when this is a pass on IR for which printing -// of changes is desired. -bool isInteresting(Any IR, StringRef PassID) { - if (!isInterestingPass(PassID)) - return false; - if (any_isa(IR)) - return isInterestingFunction(*any_cast(IR)); - return true; -} - } // namespace -template -void ChangePrinter::saveIRBeforePass(Any IR, StringRef PassID) { - // Always need to place something on the stack because invalidated passes - // are not given the IR so it cannot be determined whether the pass was for - // something that was filtered out. - BeforeStack.emplace_back(); - - if (!isInteresting(IR, PassID)) - return; - // Is this the initial IR? - if (InitialIR) { - InitialIR = false; - HandleInitialIR(IR); - } - - // Save the IR representation on the stack. - auto &Data = BeforeStack.back(); - GenerateIRRepresentation(IR, PassID, Data); -} - -template -void ChangePrinter::handleIRAfterPass(Any IR, StringRef PassID) { - assert(!BeforeStack.empty() && "Unexpected empty stack encountered."); - std::string Name; - - // unwrapModule has inconsistent handling of names for function IRs. - if (any_isa(IR)) { - const Function *F = any_cast(IR); - Name = formatv(" (function: {0})", F->getName()).str(); - } else { - if (auto UM = unwrapModule(IR)) - Name = UM->second; - } - if (Name == "") - Name = " (module)"; - - if (isIgnored(PassID)) - HandleIgnored(PassID, Name); - else if (!isInteresting(IR, PassID)) - HandleFiltered(PassID, Name); - else { - // Get the before rep from the stack - T &Before = BeforeStack.back(); - // Create the after rep - T After; - GenerateIRRepresentation(IR, PassID, After); - - // was there a change in IR? - if (Same(Before, After)) - OmitAfter(PassID, Name); - else - HandleAfter(PassID, Name, Before, After, IR); - } - BeforeStack.pop_back(); -} - -template -void ChangePrinter::handleInvalidatedPass(StringRef PassID) { - assert(!BeforeStack.empty() && "Unexpected empty stack encountered."); - - // Always flag it as invalidated as we cannot determine when - // a pass for a filtered function is invalidated since we do not - // get the IR in the callback. Also, the output is just alternate - // forms of the banner anyway. - HandleInvalidated(PassID); - BeforeStack.pop_back(); -} - -void handleInitialIR(Any IR, raw_ostream &Out) { - StringRef Banner("*** IR Dump At Start: ***"); - unwrapAndPrint(Out, IR, Banner, true); -} - -void generateOutput(Any IR, StringRef PassID, std::string &Output) { - raw_string_ostream OS(Output); - // use the after banner for all cases so it will match - SmallString<20> Banner = formatv("*** IR Dump After {0} ***", PassID); - unwrapAndPrint(OS, IR, Banner, llvm::forcePrintModuleIR()); - OS.str(); -} - -void omitAfter(StringRef PassID, std::string &Name, raw_ostream &Out) { - Out << formatv("*** IR Dump After {0}{1} omitted because no change ***\n", - PassID, Name); -} - -void handleAfter(const StringRef After, std::string &Name, raw_ostream &Out) { - assert(After.find("*** IR Dump") == 0 && "Unexpected banner format."); - StringRef Banner = After.take_until([](char C) -> bool { return C == '\n'; }); - Out << Banner; - - // LazyCallGraph::SCC already has "(scc:..." in banner so only add - // in the name if it isn't already there. - if (Name.substr(0, 6).compare(" (scc:") != 0 && !llvm::forcePrintModuleIR()) - Out << Name; - - Out << After.substr(Banner.size()); -} - -void handleInvalidated(StringRef PassID, raw_ostream &Out) { - Out << formatv("*** IR Pass {0} invalidated ***\n", PassID); -} - -void handleFiltered(StringRef PassID, std::string &Name, raw_ostream &Out) { - SmallString<20> Banner = - formatv("*** IR Dump After {0}{1} filtered out ***\n", PassID, Name); - Out << Banner; -} - -void handleIgnored(StringRef PassID, std::string &Name, raw_ostream &Out) { - Out << formatv("*** IR Pass {0}{1} ignored ***\n", PassID, Name); -} - -bool sameIR(const std::string &S1, const std::string &S2) { - return S1.compare(S2) == 0; -} - -IRChangePrinter::IRChangePrinter() - : ChangePrinter( - [this](Any IR) -> void { ::handleInitialIR(IR, Out); }, - ::generateOutput, - [this](StringRef PassID, std::string &Name) -> void { - ::omitAfter(PassID, Name, Out); - }, - [this](StringRef PassID, std::string &Name, const std::string &Before, - const std::string &After, - Any IR) -> void { ::handleAfter(After, Name, Out); }, - [this](StringRef PassID) -> void { - ::handleInvalidated(PassID, Out); - }, - [this](StringRef PassID, std::string &Name) -> void { - ::handleFiltered(PassID, Name, Out); - }, - [this](StringRef PassID, std::string &Name) -> void { - ::handleIgnored(PassID, Name, Out); - }, - ::sameIR), - Out(dbgs()) {} - -void IRChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { - if (!PrintChanged) - return; - - PIC.registerBeforePassCallback([this](StringRef P, Any IR) { - saveIRBeforePass(IR, P); - return true; - }); - - PIC.registerAfterPassCallback( - [this](StringRef P, Any IR, const PreservedAnalyses &) { - handleIRAfterPass(IR, P); - }); - PIC.registerAfterPassInvalidatedCallback( - [this](StringRef P, const PreservedAnalyses &) { - handleInvalidatedPass(P); - }); -} - PrintIRInstrumentation::~PrintIRInstrumentation() { assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit"); } @@ -562,5 +344,4 @@ void StandardInstrumentations::registerCallbacks( PrintPass.registerCallbacks(PIC); TimePasses.registerCallbacks(PIC); OptNone.registerCallbacks(PIC); - PrintChangedIR.registerCallbacks(PIC); } diff --git a/llvm/test/Other/change-printer.ll b/llvm/test/Other/change-printer.ll deleted file mode 100644 index 51354fcc0341e..0000000000000 --- a/llvm/test/Other/change-printer.ll +++ /dev/null @@ -1,109 +0,0 @@ -; Simple checks of -print-changed functionality -; -; Note that (mostly) only the banners are checked. -; -; Simple functionality check. -; RUN: opt -S -print-changed -passes=instsimplify 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK0 -; -; Check that only the passes that change the IR are printed and that the -; others (including g) are filtered out. -; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK1 -; -; Check that the reporting of IRs respects -print-module-scope -; RUN: opt -S -print-changed -passes=instsimplify -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK2 -; -; Check that the reporting of IRs respects -print-module-scope -; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs=f -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK3 -; -; Check that reporting of multiple functions happens -; RUN: opt -S -print-changed -passes=instsimplify -filter-print-funcs="f,g" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK4 -; -; Check that the reporting of IRs respects -filter-passes -; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK5 -; -; Check that the reporting of IRs respects -filter-passes with multiple passes -; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK6 -; -; Check that the reporting of IRs respects both -filter-passes and -filter-print-funcs -; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK7 -; -; Check that the reporting of IRs respects -filter-passes, -filter-print-funcs and -print-module-scope -; RUN: opt -S -print-changed -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f -print-module-scope 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK8 -; -; Check that repeated passes that change the IR are printed and that the -; others (including g) are filtered out. Note that the second time -; instsimplify is run on f, it does not change the IR -; RUN: opt -S -print-changed -passes="instsimplify,instsimplify" -filter-print-funcs=f 2>&1 -o /dev/null < %s | FileCheck %s --check-prefix=CHECK9 - -define i32 @g() { -entry: - %a = add i32 2, 3 - ret i32 %a -} - -define i32 @f() { -entry: - %a = add i32 2, 3 - ret i32 %a -} - -; CHECK0: *** IR Dump At Start: *** -; CHECK0: ; ModuleID = '' -; CHECK0: *** IR Dump After VerifierPass (module) omitted because no change *** -; CHECK0: *** IR Dump After InstSimplifyPass *** (function: g) -; CHECK0: *** IR Pass PassManager (function: g) ignored *** -; CHECK0: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK0: *** IR Pass PassManager (function: f) ignored *** -; CHECK0: *** IR Pass ModuleToFunctionPassAdaptor > (module) ignored *** -; CHECK0: *** IR Dump After VerifierPass (module) omitted because no change *** -; CHECK0: *** IR Dump After PrintModulePass (module) omitted because no change *** - -; CHECK1: *** IR Dump At Start: *** -; CHECK1: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK1: *** IR Dump After InstSimplifyPass *** (function: f) - -; CHECK2: *** IR Dump At Start: *** -; CHECK2: *** IR Dump After InstSimplifyPass *** (function: g) -; CHECK2: ModuleID = '' -; CHECK2: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK2: ModuleID = '' - -; CHECK3: *** IR Dump At Start: *** -; CHECK3: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK3: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK3: ModuleID = '' - -; CHECK4: *** IR Dump At Start: *** -; CHECK4: *** IR Dump After InstSimplifyPass *** (function: g) -; CHECK4: *** IR Dump After InstSimplifyPass *** (function: f) - -; CHECK5: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK5: *** IR Dump At Start: *** (function: g) -; CHECK5: *** IR Dump After NoOpFunctionPass (function: g) omitted because no change *** -; CHECK5: *** IR Dump After InstSimplifyPass (function: f) filtered out *** -; CHECK5: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** - -; CHECK6: *** IR Dump At Start: *** (function: g) -; CHECK6: *** IR Dump After InstSimplifyPass *** (function: g) -; CHECK6: *** IR Dump After NoOpFunctionPass (function: g) omitted because no change *** -; CHECK6: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK6: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** - -; CHECK7: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK7: *** IR Dump After NoOpFunctionPass (function: g) filtered out *** -; CHECK7: *** IR Dump At Start: *** (function: f) -; CHECK7: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK7: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** - -; CHECK8: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK8: *** IR Dump After NoOpFunctionPass (function: g) filtered out *** -; CHECK8: *** IR Dump At Start: *** (function: f) -; CHECK8: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK8: ModuleID = '' -; CHECK8: *** IR Dump After NoOpFunctionPass (function: f) omitted because no change *** - -; CHECK9: *** IR Dump At Start: *** -; CHECK9: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK9: *** IR Dump After InstSimplifyPass (function: g) filtered out *** -; CHECK9: *** IR Dump After InstSimplifyPass *** (function: f) -; CHECK9: *** IR Dump After InstSimplifyPass (function: f) omitted because no change *** From d1be928d23fe6b6770be007c7fd0753ca4d17516 Mon Sep 17 00:00:00 2001 From: Wenlei He Date: Thu, 3 Sep 2020 11:52:14 -0700 Subject: [PATCH 141/465] SVML support for log2 Although LLVM supports vectorization of loops containing log2, it did not support using SVML implementation of it. Added support so that when clang is invoked with -fveclib=SVML now an appropriate SVML library log2 implementation will be invoked. Follow up on: https://reviews.llvm.org/D77114 Tests: Added unit tests to svml-calls.ll, svml-calls-finite.ll. Can be run with llvm-lint. Created a simple c++ file that tests log2, and used clang+ to build it, and output final assembly. Reviewed By: wenlei, craig.topper Differential Revision: https://reviews.llvm.org/D86730 --- llvm/include/llvm/Analysis/VecFuncs.def | 24 +++++ .../LoopVectorize/X86/svml-calls-finite.ll | 57 +++++++++++ .../LoopVectorize/X86/svml-calls.ll | 97 +++++++++++++++++++ 3 files changed, 178 insertions(+) diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 2f64b0fedc7aa..9fdbf638078f4 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -245,6 +245,30 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4) TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8) TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16) +TLI_DEFINE_VECFUNC("log2", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("log2", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("log2", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", 16) + +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", 16) + +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16) + TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2) TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4) TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8) diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll index d6e3469c7bdb7..dd6692d75e5f5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll @@ -243,3 +243,60 @@ for.end: !71 = distinct !{!71, !72, !73} !72 = !{!"llvm.loop.vectorize.width", i32 4} !73 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @__log2f_finite(float) #0 + +; CHECK-LABEL: @log2_f32 +; CHECK: <4 x float> @__svml_log2f4 +; CHECK: ret +define void @log2_f32(float* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__log2f_finite(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: ; preds = %for.body + ret void +} + +!81 = distinct !{!21, !22, !23} +!82 = !{!"llvm.loop.vectorize.width", i32 4} +!83 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__log2_finite(double) #0 + +; CHECK-LABEL: @log2_f64 +; CHECK: <4 x double> @__svml_log24 +; CHECK: ret +define void @log2_f64(double* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__log2_finite(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %call, double* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: ; preds = %for.body + ret void +} + +!91 = distinct !{!31, !32, !33} +!92 = !{!"llvm.loop.vectorize.width", i32 4} +!93 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll index aa8a25c3b87f5..c074830075521 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll @@ -28,6 +28,11 @@ declare float @logf(float) #0 declare double @llvm.log.f64(double) #0 declare float @llvm.log.f32(float) #0 +declare double @log2(double) #0 +declare float @log2f(float) #0 +declare double @llvm.log2.f64(double) #0 +declare float @llvm.log2.f32(float) #0 + declare double @exp2(double) #0 declare float @exp2f(float) #0 declare double @llvm.exp2.f64(double) #0 @@ -501,6 +506,98 @@ for.end: ret void } +define void @log2_f64(double* nocapture %varray) { +; CHECK-LABEL: @log2_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @log2(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f32(float* nocapture %varray) { +; CHECK-LABEL: @log2_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @log2f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f64_intrinsic(double* nocapture %varray) { +; CHECK-LABEL: @log2_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log2.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log2_f32_intrinsic(float* nocapture %varray) { +; CHECK-LABEL: @log2_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log2.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + define void @exp2_f64(double* nocapture %varray) { ; CHECK-LABEL: @exp2_f64( ; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]]) From f369d51896e1c0f61df253b116c42771479549df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Mon, 17 Aug 2020 22:54:45 +0200 Subject: [PATCH 142/465] [lldb] avoid assert in threadsanitizer tests on linux The tests are unsupported on linux, but they assert in Thread::GetStopDescriptionRaw() because of empty stop reason description. And it is empty because InstrumentationRuntimeTSan::NotifyBreakpointHit() fails to get report from InstrumentationRuntimeTSan::RetrieveReportData(), which is possibly(?) the reason why this is unsupported on linux. Add a dummy stop reason description for this case, which changes the test result from failing to unsupported. --- .../InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp index a2954f556b103..68e732538158a 100644 --- a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp +++ b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp @@ -855,6 +855,8 @@ bool InstrumentationRuntimeTSan::NotifyBreakpointHit( }); report->GetAsDictionary()->AddBooleanItem("all_addresses_are_same", all_addresses_are_same); + } else { + stop_reason_description = "unknown ThreadSanitizer stop reason"; } // Make sure this is the right process From 6e09722b27ed4d48dfc668b0efc2aed88d701ebf Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 3 Sep 2020 11:20:29 -0700 Subject: [PATCH 143/465] [test] Use %t instead of %T to remove race conditions between config-file3.c and target-override.c Both tests operate on `%T/testbin`. If the two tests run concurrently, one may fail. This is likely the root cause of flaky failures reported by https://lists.llvm.org/pipermail/llvm-dev/2020-September/144781.html https://llvm.org/docs/CommandGuide/lit.html says: `%T parent directory of %t (not unique, deprecated, do not use)` Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D87103 --- clang/test/Driver/config-file3.c | 62 ++++++++++++++--------------- clang/test/Driver/target-override.c | 9 ++--- 2 files changed, 34 insertions(+), 37 deletions(-) diff --git a/clang/test/Driver/config-file3.c b/clang/test/Driver/config-file3.c index 148646c2ebbf1..fc5c286553ad5 100644 --- a/clang/test/Driver/config-file3.c +++ b/clang/test/Driver/config-file3.c @@ -1,14 +1,15 @@ // REQUIRES: shell // REQUIRES: x86-registered-target +// RUN: rm -rf %t && mkdir %t + //--- If config file is specified by relative path (workdir/cfg-s2), it is searched for by that path. + +// RUN: mkdir -p %t/workdir/subdir +// RUN: echo "@subdir/cfg-s2" > %t/workdir/cfg-1 +// RUN: echo "-Wundefined-var-template" > %t/workdir/subdir/cfg-s2 // -// RUN: mkdir -p %T/workdir -// RUN: echo "@subdir/cfg-s2" > %T/workdir/cfg-1 -// RUN: mkdir -p %T/workdir/subdir -// RUN: echo "-Wundefined-var-template" > %T/workdir/subdir/cfg-s2 -// -// RUN: ( cd %T && %clang --config workdir/cfg-1 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-REL ) +// RUN: ( cd %t && %clang --config workdir/cfg-1 -c %s -### 2>&1 | FileCheck %s -check-prefix CHECK-REL ) // // CHECK-REL: Configuration file: {{.*}}/workdir/cfg-1 // CHECK-REL: -Wundefined-var-template @@ -16,12 +17,11 @@ //--- Invocation qqq-clang-g++ tries to find config file qqq-clang-g++.cfg first. // -// RUN: rm -rf %T/testdmode -// RUN: mkdir -p %T/testdmode -// RUN: ln -s %clang %T/testdmode/qqq-clang-g++ -// RUN: echo "-Wundefined-func-template" > %T/testdmode/qqq-clang-g++.cfg -// RUN: echo "-Werror" > %T/testdmode/qqq.cfg -// RUN: %T/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix FULL-NAME +// RUN: mkdir %t/testdmode +// RUN: ln -s %clang %t/testdmode/qqq-clang-g++ +// RUN: echo "-Wundefined-func-template" > %t/testdmode/qqq-clang-g++.cfg +// RUN: echo "-Werror" > %t/testdmode/qqq.cfg +// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix FULL-NAME // // FULL-NAME: Configuration file: {{.*}}/testdmode/qqq-clang-g++.cfg // FULL-NAME: -Wundefined-func-template @@ -31,20 +31,20 @@ // (As the clang executable and symlink are in different directories, this // requires specifying the path via --config-*-dir= though.) // -// RUN: %T/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir=%T/testdmode -c %s -### 2>&1 | FileCheck %s -check-prefix SYMLINK +// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir=%t/testdmode -c %s -### 2>&1 | FileCheck %s -check-prefix SYMLINK // // SYMLINK: Configuration file: {{.*}}/testdmode/qqq-clang-g++.cfg // //--- File specified by --config overrides config inferred from clang executable. // -// RUN: %T/testdmode/qqq-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config i386-qqq -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-EXPLICIT +// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config i386-qqq -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-EXPLICIT // // CHECK-EXPLICIT: Configuration file: {{.*}}/Inputs/config/i386-qqq.cfg // //--- Invocation qqq-clang-g++ tries to find config file qqq.cfg if qqq-clang-g++.cfg is not found. // -// RUN: rm %T/testdmode/qqq-clang-g++.cfg -// RUN: %T/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix SHORT-NAME +// RUN: rm %t/testdmode/qqq-clang-g++.cfg +// RUN: %t/testdmode/qqq-clang-g++ --config-system-dir= --config-user-dir= -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix SHORT-NAME // // SHORT-NAME: Configuration file: {{.*}}/testdmode/qqq.cfg // SHORT-NAME: -Werror @@ -53,11 +53,10 @@ //--- Config files are searched for in binary directory as well. // -// RUN: rm -rf %T/testbin -// RUN: mkdir -p %T/testbin -// RUN: ln -s %clang %T/testbin/clang -// RUN: echo "-Werror" > %T/testbin/aaa.cfg -// RUN: %T/testbin/clang --config-system-dir= --config-user-dir= --config aaa.cfg -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-BIN +// RUN: mkdir %t/testbin +// RUN: ln -s %clang %t/testbin/clang +// RUN: echo "-Werror" > %t/testbin/aaa.cfg +// RUN: %t/testbin/clang --config-system-dir= --config-user-dir= --config aaa.cfg -c -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-BIN // // CHECK-BIN: Configuration file: {{.*}}/testbin/aaa.cfg // CHECK-BIN: -Werror @@ -68,12 +67,11 @@ //--- When reloading config file, x86_64-clang-g++ tries to find config i386-clang-g++.cfg first. // -// RUN: rm -rf %T/testreload -// RUN: mkdir -p %T/testreload -// RUN: ln -s %clang %T/testreload/x86_64-clang-g++ -// RUN: echo "-Wundefined-func-template" > %T/testreload/i386-clang-g++.cfg -// RUN: echo "-Werror" > %T/testreload/i386.cfg -// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD +// RUN: mkdir %t/testreload +// RUN: ln -s %clang %t/testreload/x86_64-clang-g++ +// RUN: echo "-Wundefined-func-template" > %t/testreload/i386-clang-g++.cfg +// RUN: echo "-Werror" > %t/testreload/i386.cfg +// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD // // CHECK-RELOAD: Configuration file: {{.*}}/testreload/i386-clang-g++.cfg // CHECK-RELOAD: -Wundefined-func-template @@ -81,24 +79,24 @@ //--- If config file is specified by --config and its name does not start with architecture, it is used without reloading. // -// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1a +// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1a // // CHECK-RELOAD1a: Configuration file: {{.*}}/Inputs/config-3.cfg // -// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -target i386 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1b +// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs --config-user-dir= --config config-3 -c -target i386 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1b // // CHECK-RELOAD1b: Configuration file: {{.*}}/Inputs/config-3.cfg //--- If config file is specified by --config and its name starts with architecture, it is reloaded. // -// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1c +// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir=%S/Inputs/config --config-user-dir= --config x86_64-qqq -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1c // // CHECK-RELOAD1c: Configuration file: {{.*}}/Inputs/config/i386-qqq.cfg //--- x86_64-clang-g++ tries to find config i386.cfg if i386-clang-g++.cfg is not found. // -// RUN: rm %T/testreload/i386-clang-g++.cfg -// RUN: %T/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1d +// RUN: rm %t/testreload/i386-clang-g++.cfg +// RUN: %t/testreload/x86_64-clang-g++ --config-system-dir= --config-user-dir= -c -m32 -no-canonical-prefixes %s -### 2>&1 | FileCheck %s -check-prefix CHECK-RELOAD1d // // CHECK-RELOAD1d: Configuration file: {{.*}}/testreload/i386.cfg // CHECK-RELOAD1d: -Werror diff --git a/clang/test/Driver/target-override.c b/clang/test/Driver/target-override.c index b4dbd2da1df6f..ddda8aaad85a0 100644 --- a/clang/test/Driver/target-override.c +++ b/clang/test/Driver/target-override.c @@ -1,16 +1,15 @@ // REQUIRES: shell // REQUIRES: x86-registered-target -// RUN: rm -rf %T/testbin -// RUN: mkdir -p %T/testbin -// RUN: ln -s %clang %T/testbin/i386-clang +// RUN: rm -rf %t && mkdir %t +// RUN: ln -s %clang %t/i386-clang // Check if invocation of "foo-clang" adds option "-target foo". // -// RUN: %T/testbin/i386-clang -c -no-canonical-prefixes %s -### 2>&1 | FileCheck -check-prefix CHECK-TG1 %s +// RUN: %t/i386-clang -c -no-canonical-prefixes %s -### 2>&1 | FileCheck -check-prefix CHECK-TG1 %s // CHECK-TG1: Target: i386 // Check if invocation of "foo-clang -target bar" overrides option "-target foo". // -// RUN: %T/testbin/i386-clang -c -no-canonical-prefixes -target x86_64 %s -### 2>&1 | FileCheck -check-prefix CHECK-TG2 %s +// RUN: %t/i386-clang -c -no-canonical-prefixes -target x86_64 %s -### 2>&1 | FileCheck -check-prefix CHECK-TG2 %s // CHECK-TG2: Target: x86_64 From a09eef113f171809c642d790100799bc529011a9 Mon Sep 17 00:00:00 2001 From: Bryan Chan Date: Thu, 3 Sep 2020 15:29:37 -0400 Subject: [PATCH 144/465] Replace CRLF with LF; NFC --- .../InstSimplify/ConstProp/math-1.ll | 390 +++++++++--------- .../Transforms/InstSimplify/ConstProp/rint.ll | 218 +++++----- .../InstSimplify/ConstProp/round.ll | 184 ++++----- .../InstSimplify/ConstProp/trunc.ll | 210 +++++----- .../ConstProp/vscale-getelementptr.ll | 64 +-- 5 files changed, 533 insertions(+), 533 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll b/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll index b57397381b647..5950950171023 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll @@ -1,195 +1,195 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -early-cse -S -o - %s | FileCheck %s - -declare double @acos(double) -define double @f_acos() { -; CHECK-LABEL: @f_acos( -; CHECK-NEXT: ret double 0.000000e+00 -; - %res = tail call fast double @acos(double 1.0) - ret double %res -} - -declare float @asinf(float) -define float @f_asinf() { -; CHECK-LABEL: @f_asinf( -; CHECK-NEXT: ret float 0x3FF921FB{{.+}} -; - %res = tail call fast float @asinf(float 1.0) - ret float %res -} - -declare double @atan(double) -define double @f_atan() { -; CHECK-LABEL: @f_atan( -; CHECK-NEXT: [[RES:%.*]] = tail call fast double @atan(double 1.000000e+00) -; CHECK-NEXT: ret double 0x3FE921FB -; - %res = tail call fast double @atan(double 1.0) - ret double %res -} - -declare float @cosf(float) -define float @f_cosf() { -; CHECK-LABEL: @f_cosf( -; CHECK-NEXT: ret float 0x3FE14A2{{.+}} -; - %res = tail call fast float @cosf(float 1.0) - ret float %res -} - -declare float @llvm.cos.f32(float) -define float @i_cosf() { -; CHECK-LABEL: @i_cosf( -; CHECK-NEXT: ret float 0x3FE14A2 -; - %res = tail call fast float @llvm.cos.f32(float 1.0) - ret float %res -} - -declare double @cosh(double) -define double @f_cosh() { -; CHECK-LABEL: @f_cosh( -; CHECK-NEXT: ret double 0x3FF8B075{{.+}} -; - %res = tail call fast double @cosh(double 1.0) - ret double %res -} - -declare float @expf(float) -define float @f_expf() { -; CHECK-LABEL: @f_expf( -; CHECK-NEXT: ret float 0x4005BF0A{{.+}} -; - %res = tail call fast float @expf(float 1.0) - ret float %res -} - -declare float @llvm.exp.f32(float) -define float @i_expf() { -; CHECK-LABEL: @i_expf( -; CHECK-NEXT: ret float 0x4005BF0A{{.+}} -; - %res = tail call fast float @llvm.exp.f32(float 1.0) - ret float %res -} - -declare double @exp2(double) -define double @f_exp2() { -; CHECK-LABEL: @f_exp2( -; CHECK-NEXT: ret double 2.000000e+00 -; - %res = tail call fast double @exp2(double 1.0) - ret double %res -} - -declare double @llvm.exp2.f64(double) -define double @i_exp2() { -; CHECK-LABEL: @i_exp2( -; CHECK-NEXT: ret double 2.000000e+00 -; - %res = tail call fast double @llvm.exp2.f64(double 1.0) - ret double %res -} - -; FIXME: exp10() is not widely supported. -declare float @exp10f(float) -define float @f_exp10f() { -; CHECK-LABEL: @f_exp10f( -; CHECK-NEXT: [[RES:%.*]] = tail call float @exp10f(float 1.000000e+00) -; CHECK-NEXT: ret float [[RES]] -; - %res = tail call float @exp10f(float 1.0) - ret float %res -} - -declare double @log(double) -define double @f_log() { -; CHECK-LABEL: @f_log( -; CHECK-NEXT: ret double 0.000000e+00 -; - %res = tail call fast double @log(double 1.0) - ret double %res -} - -declare double @llvm.log.f64(double) -define double @i_log() { -; CHECK-LABEL: @i_log( -; CHECK-NEXT: ret double 0.000000e+00 -; - %res = tail call fast double @llvm.log.f64(double 1.0) - ret double %res -} - -declare float @log2f(float) -define float @f_log2f() { -; CHECK-LABEL: @f_log2f( -; CHECK-NEXT: ret float 0.000000e+00 -; - %res = tail call fast float @log2f(float 1.0) - ret float %res -} - -declare float @llvm.log2.f32(float) -define float @i_log2f() { -; CHECK-LABEL: @i_log2f( -; CHECK-NEXT: ret float 0.000000e+00 -; - %res = tail call fast float @llvm.log2.f32(float 1.0) - ret float %res -} - -declare double @log10(double) -define double @f_log10() { -; CHECK-LABEL: @f_log10( -; CHECK-NEXT: ret double 0.000000e+00 -; - %res = tail call fast double @log10(double 1.0) - ret double %res -} - -declare float @sinf(float) -define float @f_sinf() { -; CHECK-LABEL: @f_sinf( -; CHECK-NEXT: ret float 0x3FEAED54{{.+}} -; - %res = tail call fast float @sinf(float 1.0) - ret float %res -} - -declare double @sinh(double) -define double @f_sinh() { -; CHECK-LABEL: @f_sinh( -; CHECK-NEXT: ret double 0x3FF2CD9F{{.+}} -; - %res = tail call fast double @sinh(double 1.0) - ret double %res -} - -declare float @sqrtf(float) -define float @f_sqrtf() { -; CHECK-LABEL: @f_sqrtf( -; CHECK-NEXT: ret float 1.000000e+00 -; - %res = tail call fast float @sqrtf(float 1.0) - ret float %res -} - -declare double @tan(double) -define double @f_tan() { -; CHECK-LABEL: @f_tan( -; CHECK-NEXT: ret double 0x3FF8EB24{{.+}} -; - %res = tail call fast double @tan(double 1.0) - ret double %res -} - -declare float @tanhf(float) -define float @f_tanhf() { -; CHECK-LABEL: @f_tanhf( -; CHECK-NEXT: [[RES:%.*]] = tail call fast float @tanhf(float 1.000000e+00) -; CHECK-NEXT: ret float 0x3FE85EFA{{.+}} -; - %res = tail call fast float @tanhf(float 1.0) - ret float %res -} +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -early-cse -S -o - %s | FileCheck %s + +declare double @acos(double) +define double @f_acos() { +; CHECK-LABEL: @f_acos( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @acos(double 1.0) + ret double %res +} + +declare float @asinf(float) +define float @f_asinf() { +; CHECK-LABEL: @f_asinf( +; CHECK-NEXT: ret float 0x3FF921FB{{.+}} +; + %res = tail call fast float @asinf(float 1.0) + ret float %res +} + +declare double @atan(double) +define double @f_atan() { +; CHECK-LABEL: @f_atan( +; CHECK-NEXT: [[RES:%.*]] = tail call fast double @atan(double 1.000000e+00) +; CHECK-NEXT: ret double 0x3FE921FB +; + %res = tail call fast double @atan(double 1.0) + ret double %res +} + +declare float @cosf(float) +define float @f_cosf() { +; CHECK-LABEL: @f_cosf( +; CHECK-NEXT: ret float 0x3FE14A2{{.+}} +; + %res = tail call fast float @cosf(float 1.0) + ret float %res +} + +declare float @llvm.cos.f32(float) +define float @i_cosf() { +; CHECK-LABEL: @i_cosf( +; CHECK-NEXT: ret float 0x3FE14A2 +; + %res = tail call fast float @llvm.cos.f32(float 1.0) + ret float %res +} + +declare double @cosh(double) +define double @f_cosh() { +; CHECK-LABEL: @f_cosh( +; CHECK-NEXT: ret double 0x3FF8B075{{.+}} +; + %res = tail call fast double @cosh(double 1.0) + ret double %res +} + +declare float @expf(float) +define float @f_expf() { +; CHECK-LABEL: @f_expf( +; CHECK-NEXT: ret float 0x4005BF0A{{.+}} +; + %res = tail call fast float @expf(float 1.0) + ret float %res +} + +declare float @llvm.exp.f32(float) +define float @i_expf() { +; CHECK-LABEL: @i_expf( +; CHECK-NEXT: ret float 0x4005BF0A{{.+}} +; + %res = tail call fast float @llvm.exp.f32(float 1.0) + ret float %res +} + +declare double @exp2(double) +define double @f_exp2() { +; CHECK-LABEL: @f_exp2( +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = tail call fast double @exp2(double 1.0) + ret double %res +} + +declare double @llvm.exp2.f64(double) +define double @i_exp2() { +; CHECK-LABEL: @i_exp2( +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = tail call fast double @llvm.exp2.f64(double 1.0) + ret double %res +} + +; FIXME: exp10() is not widely supported. +declare float @exp10f(float) +define float @f_exp10f() { +; CHECK-LABEL: @f_exp10f( +; CHECK-NEXT: [[RES:%.*]] = tail call float @exp10f(float 1.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = tail call float @exp10f(float 1.0) + ret float %res +} + +declare double @log(double) +define double @f_log() { +; CHECK-LABEL: @f_log( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @log(double 1.0) + ret double %res +} + +declare double @llvm.log.f64(double) +define double @i_log() { +; CHECK-LABEL: @i_log( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @llvm.log.f64(double 1.0) + ret double %res +} + +declare float @log2f(float) +define float @f_log2f() { +; CHECK-LABEL: @f_log2f( +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = tail call fast float @log2f(float 1.0) + ret float %res +} + +declare float @llvm.log2.f32(float) +define float @i_log2f() { +; CHECK-LABEL: @i_log2f( +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = tail call fast float @llvm.log2.f32(float 1.0) + ret float %res +} + +declare double @log10(double) +define double @f_log10() { +; CHECK-LABEL: @f_log10( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @log10(double 1.0) + ret double %res +} + +declare float @sinf(float) +define float @f_sinf() { +; CHECK-LABEL: @f_sinf( +; CHECK-NEXT: ret float 0x3FEAED54{{.+}} +; + %res = tail call fast float @sinf(float 1.0) + ret float %res +} + +declare double @sinh(double) +define double @f_sinh() { +; CHECK-LABEL: @f_sinh( +; CHECK-NEXT: ret double 0x3FF2CD9F{{.+}} +; + %res = tail call fast double @sinh(double 1.0) + ret double %res +} + +declare float @sqrtf(float) +define float @f_sqrtf() { +; CHECK-LABEL: @f_sqrtf( +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = tail call fast float @sqrtf(float 1.0) + ret float %res +} + +declare double @tan(double) +define double @f_tan() { +; CHECK-LABEL: @f_tan( +; CHECK-NEXT: ret double 0x3FF8EB24{{.+}} +; + %res = tail call fast double @tan(double 1.0) + ret double %res +} + +declare float @tanhf(float) +define float @f_tanhf() { +; CHECK-LABEL: @f_tanhf( +; CHECK-NEXT: [[RES:%.*]] = tail call fast float @tanhf(float 1.000000e+00) +; CHECK-NEXT: ret float 0x3FE85EFA{{.+}} +; + %res = tail call fast float @tanhf(float 1.0) + ret float %res +} diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll b/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll index 1c88bab81a390..9ad794d6094b0 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll @@ -1,109 +1,109 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -early-cse < %s | FileCheck %s - -declare float @nearbyintf(float) #0 -declare float @llvm.nearbyint.f32(float) #0 -declare double @nearbyint(double) #0 -declare double @llvm.nearbyint.f64(double) #0 -declare float @rintf(float) #0 -declare float @llvm.rint.f32(float) #0 -declare double @rint(double) #0 -declare double @llvm.rint.f64(double) #0 - -define float @constant_fold_rint_f32_01() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_01( -; CHECK-NEXT: ret float 1.000000e+00 -; - %x = call float @nearbyintf(float 1.25) #0 - ret float %x -} - -define float @constant_fold_rint_f32_02() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_02( -; CHECK-NEXT: ret float -1.000000e+00 -; - %x = call float @llvm.nearbyint.f32(float -1.25) #0 - ret float %x -} - -define float @constant_fold_rint_f32_03() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_03( -; CHECK-NEXT: ret float 2.000000e+00 -; - %x = call float @rintf(float 1.5) #0 - ret float %x -} - -define float @constant_fold_rint_f32_04() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_04( -; CHECK-NEXT: ret float -2.000000e+00 -; - %x = call float @llvm.rint.f32(float -1.5) #0 - ret float %x -} - -define float @constant_fold_rint_f32_05() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_05( -; CHECK-NEXT: ret float 3.000000e+00 -; - %x = call float @nearbyintf(float 2.75) #0 - ret float %x -} - -define float @constant_fold_rint_f32_06() #0 { -; CHECK-LABEL: @constant_fold_rint_f32_06( -; CHECK-NEXT: ret float -3.000000e+00 -; - %x = call float @llvm.nearbyint.f32(float -2.75) #0 - ret float %x -} - -define double @constant_fold_rint_f64_01() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_01( -; CHECK-NEXT: ret double 1.000000e+00 -; - %x = call double @rint(double 1.3) #0 - ret double %x -} - -define double @constant_fold_rint_f64_02() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_02( -; CHECK-NEXT: ret double -1.000000e+00 -; - %x = call double @llvm.rint.f64(double -1.3) #0 - ret double %x -} - -define double @constant_fold_rint_f64_03() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_03( -; CHECK-NEXT: ret double 2.000000e+00 -; - %x = call double @nearbyint(double 1.5) #0 - ret double %x -} - -define double @constant_fold_rint_f64_04() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_04( -; CHECK-NEXT: ret double -2.000000e+00 -; - %x = call double @llvm.nearbyint.f64(double -1.5) #0 - ret double %x -} - -define double @constant_fold_rint_f64_05() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_05( -; CHECK-NEXT: ret double 3.000000e+00 -; - %x = call double @rint(double 2.7) #0 - ret double %x -} - -define double @constant_fold_rint_f64_06() #0 { -; CHECK-LABEL: @constant_fold_rint_f64_06( -; CHECK-NEXT: ret double -3.000000e+00 -; - %x = call double @llvm.rint.f64(double -2.7) #0 - ret double %x -} - -attributes #0 = { nounwind readnone } +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -early-cse < %s | FileCheck %s + +declare float @nearbyintf(float) #0 +declare float @llvm.nearbyint.f32(float) #0 +declare double @nearbyint(double) #0 +declare double @llvm.nearbyint.f64(double) #0 +declare float @rintf(float) #0 +declare float @llvm.rint.f32(float) #0 +declare double @rint(double) #0 +declare double @llvm.rint.f64(double) #0 + +define float @constant_fold_rint_f32_01() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_01( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @nearbyintf(float 1.25) #0 + ret float %x +} + +define float @constant_fold_rint_f32_02() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_02( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.nearbyint.f32(float -1.25) #0 + ret float %x +} + +define float @constant_fold_rint_f32_03() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_03( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @rintf(float 1.5) #0 + ret float %x +} + +define float @constant_fold_rint_f32_04() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_04( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.rint.f32(float -1.5) #0 + ret float %x +} + +define float @constant_fold_rint_f32_05() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_05( +; CHECK-NEXT: ret float 3.000000e+00 +; + %x = call float @nearbyintf(float 2.75) #0 + ret float %x +} + +define float @constant_fold_rint_f32_06() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_06( +; CHECK-NEXT: ret float -3.000000e+00 +; + %x = call float @llvm.nearbyint.f32(float -2.75) #0 + ret float %x +} + +define double @constant_fold_rint_f64_01() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_01( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @rint(double 1.3) #0 + ret double %x +} + +define double @constant_fold_rint_f64_02() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_02( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.rint.f64(double -1.3) #0 + ret double %x +} + +define double @constant_fold_rint_f64_03() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_03( +; CHECK-NEXT: ret double 2.000000e+00 +; + %x = call double @nearbyint(double 1.5) #0 + ret double %x +} + +define double @constant_fold_rint_f64_04() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_04( +; CHECK-NEXT: ret double -2.000000e+00 +; + %x = call double @llvm.nearbyint.f64(double -1.5) #0 + ret double %x +} + +define double @constant_fold_rint_f64_05() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_05( +; CHECK-NEXT: ret double 3.000000e+00 +; + %x = call double @rint(double 2.7) #0 + ret double %x +} + +define double @constant_fold_rint_f64_06() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_06( +; CHECK-NEXT: ret double -3.000000e+00 +; + %x = call double @llvm.rint.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/round.ll b/llvm/test/Transforms/InstSimplify/ConstProp/round.ll index a636160dd12a6..d5b847810d5eb 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/round.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/round.ll @@ -1,92 +1,92 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s - -declare float @roundf(float) #0 -declare float @llvm.round.f32(float) #0 -declare double @round(double) #0 -declare double @llvm.round.f64(double) #0 - -; CHECK-LABEL: @constant_fold_round_f32_01 -; CHECK-NEXT: ret float 1.000000e+00 -define float @constant_fold_round_f32_01() #0 { - %x = call float @roundf(float 1.25) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f32_02 -; CHECK-NEXT: ret float -1.000000e+00 -define float @constant_fold_round_f32_02() #0 { - %x = call float @llvm.round.f32(float -1.25) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f32_03 -; CHECK-NEXT: ret float 2.000000e+00 -define float @constant_fold_round_f32_03() #0 { - %x = call float @roundf(float 1.5) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f32_04 -; CHECK-NEXT: ret float -2.000000e+00 -define float @constant_fold_round_f32_04() #0 { - %x = call float @llvm.round.f32(float -1.5) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f32_05 -; CHECK-NEXT: ret float 3.000000e+00 -define float @constant_fold_round_f32_05() #0 { - %x = call float @roundf(float 2.75) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f32_06 -; CHECK-NEXT: ret float -3.000000e+00 -define float @constant_fold_round_f32_06() #0 { - %x = call float @llvm.round.f32(float -2.75) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_round_f64_01 -; CHECK-NEXT: ret double 1.000000e+00 -define double @constant_fold_round_f64_01() #0 { - %x = call double @round(double 1.3) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_round_f64_02 -; CHECK-NEXT: ret double -1.000000e+00 -define double @constant_fold_round_f64_02() #0 { - %x = call double @llvm.round.f64(double -1.3) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_round_f64_03 -; CHECK-NEXT: ret double 2.000000e+00 -define double @constant_fold_round_f64_03() #0 { - %x = call double @round(double 1.5) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_round_f64_04 -; CHECK-NEXT: ret double -2.000000e+00 -define double @constant_fold_round_f64_04() #0 { - %x = call double @llvm.round.f64(double -1.5) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_round_f64_05 -; CHECK-NEXT: ret double 3.000000e+00 -define double @constant_fold_round_f64_05() #0 { - %x = call double @round(double 2.7) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_round_f64_06 -; CHECK-NEXT: ret double -3.000000e+00 -define double @constant_fold_round_f64_06() #0 { - %x = call double @llvm.round.f64(double -2.7) #0 - ret double %x -} - -attributes #0 = { nounwind readnone } +; RUN: opt -S -early-cse < %s | FileCheck %s + +declare float @roundf(float) #0 +declare float @llvm.round.f32(float) #0 +declare double @round(double) #0 +declare double @llvm.round.f64(double) #0 + +; CHECK-LABEL: @constant_fold_round_f32_01 +; CHECK-NEXT: ret float 1.000000e+00 +define float @constant_fold_round_f32_01() #0 { + %x = call float @roundf(float 1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_02 +; CHECK-NEXT: ret float -1.000000e+00 +define float @constant_fold_round_f32_02() #0 { + %x = call float @llvm.round.f32(float -1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_03 +; CHECK-NEXT: ret float 2.000000e+00 +define float @constant_fold_round_f32_03() #0 { + %x = call float @roundf(float 1.5) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_04 +; CHECK-NEXT: ret float -2.000000e+00 +define float @constant_fold_round_f32_04() #0 { + %x = call float @llvm.round.f32(float -1.5) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_05 +; CHECK-NEXT: ret float 3.000000e+00 +define float @constant_fold_round_f32_05() #0 { + %x = call float @roundf(float 2.75) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_06 +; CHECK-NEXT: ret float -3.000000e+00 +define float @constant_fold_round_f32_06() #0 { + %x = call float @llvm.round.f32(float -2.75) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f64_01 +; CHECK-NEXT: ret double 1.000000e+00 +define double @constant_fold_round_f64_01() #0 { + %x = call double @round(double 1.3) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_02 +; CHECK-NEXT: ret double -1.000000e+00 +define double @constant_fold_round_f64_02() #0 { + %x = call double @llvm.round.f64(double -1.3) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_03 +; CHECK-NEXT: ret double 2.000000e+00 +define double @constant_fold_round_f64_03() #0 { + %x = call double @round(double 1.5) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_04 +; CHECK-NEXT: ret double -2.000000e+00 +define double @constant_fold_round_f64_04() #0 { + %x = call double @llvm.round.f64(double -1.5) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_05 +; CHECK-NEXT: ret double 3.000000e+00 +define double @constant_fold_round_f64_05() #0 { + %x = call double @round(double 2.7) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_06 +; CHECK-NEXT: ret double -3.000000e+00 +define double @constant_fold_round_f64_06() #0 { + %x = call double @llvm.round.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll b/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll index 68d20002fe4ab..df13809171454 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll @@ -1,105 +1,105 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -early-cse < %s | FileCheck %s - -declare float @truncf(float) #0 -declare float @llvm.trunc.f32(float) #0 -declare double @trunc(double) #0 -declare double @llvm.trunc.f64(double) #0 - -define float @constant_fold_trunc_f32_01() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_01( -; CHECK-NEXT: ret float 1.000000e+00 -; - %x = call float @truncf(float 1.25) #0 - ret float %x -} - -define float @constant_fold_trunc_f32_02() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_02( -; CHECK-NEXT: ret float -1.000000e+00 -; - %x = call float @llvm.trunc.f32(float -1.25) #0 - ret float %x -} - -define float @constant_fold_trunc_f32_03() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_03( -; CHECK-NEXT: ret float 1.000000e+00 -; - %x = call float @truncf(float 1.5) #0 - ret float %x -} - -define float @constant_fold_trunc_f32_04() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_04( -; CHECK-NEXT: ret float -1.000000e+00 -; - %x = call float @llvm.trunc.f32(float -1.5) #0 - ret float %x -} - -define float @constant_fold_trunc_f32_05() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_05( -; CHECK-NEXT: ret float 2.000000e+00 -; - %x = call float @truncf(float 2.75) #0 - ret float %x -} - -define float @constant_fold_trunc_f32_06() #0 { -; CHECK-LABEL: @constant_fold_trunc_f32_06( -; CHECK-NEXT: ret float -2.000000e+00 -; - %x = call float @llvm.trunc.f32(float -2.75) #0 - ret float %x -} - -define double @constant_fold_trunc_f64_01() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_01( -; CHECK-NEXT: ret double 1.000000e+00 -; - %x = call double @trunc(double 1.3) #0 - ret double %x -} - -define double @constant_fold_trunc_f64_02() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_02( -; CHECK-NEXT: ret double -1.000000e+00 -; - %x = call double @llvm.trunc.f64(double -1.3) #0 - ret double %x -} - -define double @constant_fold_trunc_f64_03() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_03( -; CHECK-NEXT: ret double 1.000000e+00 -; - %x = call double @trunc(double 1.5) #0 - ret double %x -} - -define double @constant_fold_trunc_f64_04() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_04( -; CHECK-NEXT: ret double -1.000000e+00 -; - %x = call double @llvm.trunc.f64(double -1.5) #0 - ret double %x -} - -define double @constant_fold_trunc_f64_05() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_05( -; CHECK-NEXT: ret double 2.000000e+00 -; - %x = call double @trunc(double 2.7) #0 - ret double %x -} - -define double @constant_fold_trunc_f64_06() #0 { -; CHECK-LABEL: @constant_fold_trunc_f64_06( -; CHECK-NEXT: ret double -2.000000e+00 -; - %x = call double @llvm.trunc.f64(double -2.7) #0 - ret double %x -} - -attributes #0 = { nounwind readnone } +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -early-cse < %s | FileCheck %s + +declare float @truncf(float) #0 +declare float @llvm.trunc.f32(float) #0 +declare double @trunc(double) #0 +declare double @llvm.trunc.f64(double) #0 + +define float @constant_fold_trunc_f32_01() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_01( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @truncf(float 1.25) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_02() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_02( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.trunc.f32(float -1.25) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_03() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_03( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @truncf(float 1.5) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_04() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_04( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.trunc.f32(float -1.5) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_05() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_05( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @truncf(float 2.75) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_06() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_06( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.trunc.f32(float -2.75) #0 + ret float %x +} + +define double @constant_fold_trunc_f64_01() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_01( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @trunc(double 1.3) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_02() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_02( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.trunc.f64(double -1.3) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_03() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_03( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @trunc(double 1.5) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_04() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_04( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.trunc.f64(double -1.5) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_05() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_05( +; CHECK-NEXT: ret double 2.000000e+00 +; + %x = call double @trunc(double 2.7) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_06() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_06( +; CHECK-NEXT: ret double -2.000000e+00 +; + %x = call double @llvm.trunc.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll index 6ac6564a08a97..8e90961928c98 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll @@ -1,32 +1,32 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s - -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64" - -; CHECK-LABEL: define <4 x i32*> @fixed_length_version_first() { -; CHECK-NEXT: ret <4 x i32*> undef -define <4 x i32*> @fixed_length_version_first() { - %ptr = getelementptr i32, <4 x i32*> undef, <4 x i64> undef - ret <4 x i32*> %ptr -} - -; CHECK-LABEL: define <4 x <4 x i32>*> @fixed_length_version_second() { -; CHECK-NEXT: ret <4 x <4 x i32>*> undef -define <4 x <4 x i32>*> @fixed_length_version_second() { - %ptr = getelementptr <4 x i32>, <4 x i32>* undef, <4 x i64> undef - ret <4 x <4 x i32>*> %ptr -} - -; CHECK-LABEL: define @vscale_version_first() { -; CHECK-NEXT: ret undef -define @vscale_version_first() { - %ptr = getelementptr i32, undef, undef - ret %ptr -} - -; CHECK-LABEL: define *> @vscale_version_second() { -; CHECK-NEXT: ret *> undef -define *> @vscale_version_second() { - %ptr = getelementptr , * undef, undef - ret *> %ptr -} +; RUN: opt -early-cse -S < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +; CHECK-LABEL: define <4 x i32*> @fixed_length_version_first() { +; CHECK-NEXT: ret <4 x i32*> undef +define <4 x i32*> @fixed_length_version_first() { + %ptr = getelementptr i32, <4 x i32*> undef, <4 x i64> undef + ret <4 x i32*> %ptr +} + +; CHECK-LABEL: define <4 x <4 x i32>*> @fixed_length_version_second() { +; CHECK-NEXT: ret <4 x <4 x i32>*> undef +define <4 x <4 x i32>*> @fixed_length_version_second() { + %ptr = getelementptr <4 x i32>, <4 x i32>* undef, <4 x i64> undef + ret <4 x <4 x i32>*> %ptr +} + +; CHECK-LABEL: define @vscale_version_first() { +; CHECK-NEXT: ret undef +define @vscale_version_first() { + %ptr = getelementptr i32, undef, undef + ret %ptr +} + +; CHECK-LABEL: define *> @vscale_version_second() { +; CHECK-NEXT: ret *> undef +define *> @vscale_version_second() { + %ptr = getelementptr , * undef, undef + ret *> %ptr +} From b53243e19496ae59ae1ce457a40e3ea610011fd1 Mon Sep 17 00:00:00 2001 From: Dokyung Song Date: Tue, 1 Sep 2020 16:22:59 +0000 Subject: [PATCH 145/465] [libFuzzer] Evenly select inputs to cross over with from the corpus regardless of the input's coverage. This patch adds an option "cross_over_uniform_dist", which, if 1, considers all inputs in the corpus for the crossover input selection. More specifically, this patch uses a uniform distribution of all inputs in the corpus for the CrossOver input selection. Note that input selection for mutation is still fully determined by the scheduling policy (i.e., vanilla or Entropic); the uniform distribution only applies to the secondary input selection, only for the crossover mutation of the base input chosen by the scheduling policy. This way the corpus inputs that have useful fragments in them, even though they are deprioritized by the scheduling policy, have chances of getting mixed with other inputs that are prioritized and selected as base input for mutation. Differential Revision: https://reviews.llvm.org/D86954 --- compiler-rt/lib/fuzzer/FuzzerCorpus.h | 9 +++++++++ compiler-rt/lib/fuzzer/FuzzerDriver.cpp | 1 + compiler-rt/lib/fuzzer/FuzzerFlags.def | 10 ++++++++++ compiler-rt/lib/fuzzer/FuzzerLoop.cpp | 7 +++++-- compiler-rt/lib/fuzzer/FuzzerOptions.h | 1 + .../test/fuzzer/cross_over_uniform_dist.test | 16 ++++++++++++++++ 6 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 compiler-rt/test/fuzzer/cross_over_uniform_dist.test diff --git a/compiler-rt/lib/fuzzer/FuzzerCorpus.h b/compiler-rt/lib/fuzzer/FuzzerCorpus.h index fd8ff6ca427dd..6b45d37a4b140 100644 --- a/compiler-rt/lib/fuzzer/FuzzerCorpus.h +++ b/compiler-rt/lib/fuzzer/FuzzerCorpus.h @@ -270,6 +270,15 @@ class InputCorpus { return II; } + InputInfo &ChooseUnitToCrossOverWith(Random &Rand, bool UniformDist) { + if (!UniformDist) { + return ChooseUnitToMutate(Rand); + } + InputInfo &II = *Inputs[Rand(Inputs.size())]; + assert(!II.U.empty()); + return II; + } + // Returns an index of random unit from the corpus to mutate. size_t ChooseUnitIdxToMutate(Random &Rand) { UpdateCorpusDistribution(Rand); diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index a13fb03a7fc1c..93b1ff658e42e 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -680,6 +680,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.IgnoreCrashes = Flags.ignore_crashes; Options.MaxTotalTimeSec = Flags.max_total_time; Options.DoCrossOver = Flags.cross_over; + Options.CrossOverUniformDist = Flags.cross_over_uniform_dist; Options.MutateDepth = Flags.mutate_depth; Options.ReduceDepth = Flags.reduce_depth; Options.UseCounters = Flags.use_counters; diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def index 1dc805d4e539d..5194666634b43 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -28,6 +28,16 @@ FUZZER_FLAG_INT(keep_seed, 0, "If 1, keep seed inputs in the corpus even if " "seed inputs will never be reduced. This option can be useful when seeds are" "not properly formed for the fuzz target but still have useful snippets.") FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") +FUZZER_FLAG_INT(cross_over_uniform_dist, 0, "Experimental. If 1, use a " + "uniform probability distribution when choosing inputs to cross over with. " + "Some of the inputs in the corpus may never get chosen for mutation " + "depending on the input mutation scheduling policy. With this flag, all " + "inputs, regardless of the input mutation scheduling policy, can be chosen " + "as an input to cross over with. This can be particularly useful with " + "|keep_seed==1|; all the initial seed inputs, even though they do not " + "increase coverage because they are not properly formed, will still be " + "chosen as an input to cross over with.") + FUZZER_FLAG_INT(mutate_depth, 5, "Apply this number of consecutive mutations to each input.") FUZZER_FLAG_INT(reduce_depth, 0, "Experimental/internal. " diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index d76075b0a81a3..f97b4d2f7f015 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -666,8 +666,11 @@ void Fuzzer::MutateAndTestOne() { MD.StartMutationSequence(); auto &II = Corpus.ChooseUnitToMutate(MD.GetRand()); - if (Options.DoCrossOver) - MD.SetCrossOverWith(&Corpus.ChooseUnitToMutate(MD.GetRand()).U); + if (Options.DoCrossOver) { + auto &CrossOverII = Corpus.ChooseUnitToCrossOverWith( + MD.GetRand(), Options.CrossOverUniformDist); + MD.SetCrossOverWith(&CrossOverII.U); + } const auto &U = II.U; memcpy(BaseSha1, II.Sha1, sizeof(BaseSha1)); assert(CurrentUnitData); diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h index 26961973d50bb..281658dadf04b 100644 --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -31,6 +31,7 @@ struct FuzzingOptions { int RssLimitMb = 0; int MallocLimitMb = 0; bool DoCrossOver = true; + bool CrossOverUniformDist = false; int MutateDepth = 5; bool ReduceDepth = false; bool UseCounters = false; diff --git a/compiler-rt/test/fuzzer/cross_over_uniform_dist.test b/compiler-rt/test/fuzzer/cross_over_uniform_dist.test new file mode 100644 index 0000000000000..0dff5fd628f37 --- /dev/null +++ b/compiler-rt/test/fuzzer/cross_over_uniform_dist.test @@ -0,0 +1,16 @@ +REQUIRES: linux, x86_64 +RUN: %cpp_compiler %S/KeepSeedTest.cpp -o %t-CrossOverUniformDistTest + +RUN: rm -rf %t-corpus +RUN: mkdir %t-corpus +RUN: echo -n "@SELECT" > %t-corpus/A +RUN: echo -n "@FROM WHERE" > %t-corpus/B + +RUN: not %run %t-CrossOverUniformDistTest -keep_seed=1 -cross_over_uniform_dist=1 -seed=1 -runs=2000000 %t-corpus 2>&1 | FileCheck %s +CHECK: BINGO + +RUN: rm -rf %t-corpus +RUN: mkdir %t-corpus +RUN: echo -n "@SELECT" > %t-corpus/A +RUN: echo -n "@FROM WHERE" > %t-corpus/B +RUN: %run %t-CrossOverUniformDistTest -keep_seed=1 -seed=1 -runs=2000000 %t-corpus 2>&1 From c9771391ce05e5cba00e29017fd6c39157df3f3c Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 2 Sep 2020 21:54:27 -0700 Subject: [PATCH 146/465] [NewPM][Lint] Port -lint to NewPM This also changes -lint from an analysis to a pass. It's similar to -verify, and that is a normal pass, and lives in llvm/IR. Reviewed By: ychen Differential Revision: https://reviews.llvm.org/D87057 --- llvm/include/llvm/Analysis/Lint.h | 28 +-- llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/LinkAllPasses.h | 2 +- llvm/lib/Analysis/Analysis.cpp | 2 +- llvm/lib/Analysis/Lint.cpp | 304 +++++++++++++++------------ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/test/Other/lint.ll | 1 + 8 files changed, 184 insertions(+), 157 deletions(-) diff --git a/llvm/include/llvm/Analysis/Lint.h b/llvm/include/llvm/Analysis/Lint.h index 0fea81e215c91..6eb637e727821 100644 --- a/llvm/include/llvm/Analysis/Lint.h +++ b/llvm/include/llvm/Analysis/Lint.h @@ -19,30 +19,30 @@ #ifndef LLVM_ANALYSIS_LINT_H #define LLVM_ANALYSIS_LINT_H +#include "llvm/IR/PassManager.h" + namespace llvm { class FunctionPass; class Module; class Function; -/// Create a lint pass. -/// -/// Check a module or function. -FunctionPass *createLintPass(); +FunctionPass *createLintLegacyPassPass(); -/// Check a module. +/// Lint a module. /// /// This should only be used for debugging, because it plays games with /// PassManagers and stuff. -void lintModule( - const Module &M ///< The module to be checked -); +void lintModule(const Module &M); + +// Lint a function. +void lintFunction(const Function &F); -// lintFunction - Check a function. -void lintFunction( - const Function &F ///< The function to be checked -); +class LintPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; -} // End llvm namespace +} // namespace llvm -#endif +#endif // LLVM_ANALYSIS_LINT_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 7e512ba56c728..63ae19d8495db 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -218,7 +218,7 @@ void initializeLegalizerPass(PassRegistry&); void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &); void initializeGISelKnownBitsAnalysisPass(PassRegistry &); void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&); -void initializeLintPass(PassRegistry&); +void initializeLintLegacyPassPass(PassRegistry &); void initializeLiveDebugValuesPass(PassRegistry&); void initializeLiveDebugVariablesPass(PassRegistry&); void initializeLiveIntervalsPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index dfd0e9c8da705..59284eecfbc7f 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -203,7 +203,7 @@ namespace { (void) llvm::createPrintFunctionPass(os); (void) llvm::createModuleDebugInfoPrinterPass(); (void) llvm::createPartialInliningPass(); - (void) llvm::createLintPass(); + (void) llvm::createLintLegacyPassPass(); (void) llvm::createSinkingPass(); (void) llvm::createLowerAtomicPass(); (void) llvm::createCorrelatedValuePropagationPass(); diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index a9ece42df8563..0496e23195d57 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -57,7 +57,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeLazyValueInfoWrapperPassPass(Registry); initializeLazyValueInfoPrinterPass(Registry); initializeLegacyDivergenceAnalysisPass(Registry); - initializeLintPass(Registry); + initializeLintLegacyPassPass(Registry); initializeLoopInfoWrapperPassPass(Registry); initializeMemDepPrinterPass(Registry); initializeMemDerefPrinterPass(Registry); diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index 4a159d6035f0d..04e04a8053e87 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -63,6 +63,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" @@ -80,134 +81,102 @@ using namespace llvm; namespace { - namespace MemRef { - static const unsigned Read = 1; - static const unsigned Write = 2; - static const unsigned Callee = 4; - static const unsigned Branchee = 8; - } // end namespace MemRef - - class Lint : public FunctionPass, public InstVisitor { - friend class InstVisitor; - - void visitFunction(Function &F); - - void visitCallBase(CallBase &CB); - void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, - MaybeAlign Alignment, Type *Ty, unsigned Flags); - void visitEHBeginCatch(IntrinsicInst *II); - void visitEHEndCatch(IntrinsicInst *II); - - void visitReturnInst(ReturnInst &I); - void visitLoadInst(LoadInst &I); - void visitStoreInst(StoreInst &I); - void visitXor(BinaryOperator &I); - void visitSub(BinaryOperator &I); - void visitLShr(BinaryOperator &I); - void visitAShr(BinaryOperator &I); - void visitShl(BinaryOperator &I); - void visitSDiv(BinaryOperator &I); - void visitUDiv(BinaryOperator &I); - void visitSRem(BinaryOperator &I); - void visitURem(BinaryOperator &I); - void visitAllocaInst(AllocaInst &I); - void visitVAArgInst(VAArgInst &I); - void visitIndirectBrInst(IndirectBrInst &I); - void visitExtractElementInst(ExtractElementInst &I); - void visitInsertElementInst(InsertElementInst &I); - void visitUnreachableInst(UnreachableInst &I); - - Value *findValue(Value *V, bool OffsetOk) const; - Value *findValueImpl(Value *V, bool OffsetOk, - SmallPtrSetImpl &Visited) const; - - public: - Module *Mod; - const DataLayout *DL; - AliasAnalysis *AA; - AssumptionCache *AC; - DominatorTree *DT; - TargetLibraryInfo *TLI; - - std::string Messages; - raw_string_ostream MessagesStr; - - static char ID; // Pass identification, replacement for typeid - Lint() : FunctionPass(ID), MessagesStr(Messages) { - initializeLintPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - void print(raw_ostream &O, const Module *M) const override {} - - void WriteValues(ArrayRef Vs) { - for (const Value *V : Vs) { - if (!V) - continue; - if (isa(V)) { - MessagesStr << *V << '\n'; - } else { - V->printAsOperand(MessagesStr, true, Mod); - MessagesStr << '\n'; - } +namespace MemRef { +static const unsigned Read = 1; +static const unsigned Write = 2; +static const unsigned Callee = 4; +static const unsigned Branchee = 8; +} // end namespace MemRef + +class Lint : public InstVisitor { + friend class InstVisitor; + + void visitFunction(Function &F); + + void visitCallBase(CallBase &CB); + void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, + MaybeAlign Alignment, Type *Ty, unsigned Flags); + void visitEHBeginCatch(IntrinsicInst *II); + void visitEHEndCatch(IntrinsicInst *II); + + void visitReturnInst(ReturnInst &I); + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitXor(BinaryOperator &I); + void visitSub(BinaryOperator &I); + void visitLShr(BinaryOperator &I); + void visitAShr(BinaryOperator &I); + void visitShl(BinaryOperator &I); + void visitSDiv(BinaryOperator &I); + void visitUDiv(BinaryOperator &I); + void visitSRem(BinaryOperator &I); + void visitURem(BinaryOperator &I); + void visitAllocaInst(AllocaInst &I); + void visitVAArgInst(VAArgInst &I); + void visitIndirectBrInst(IndirectBrInst &I); + void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitUnreachableInst(UnreachableInst &I); + + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, + SmallPtrSetImpl &Visited) const; + +public: + Module *Mod; + const DataLayout *DL; + AliasAnalysis *AA; + AssumptionCache *AC; + DominatorTree *DT; + TargetLibraryInfo *TLI; + + std::string Messages; + raw_string_ostream MessagesStr; + + Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, + AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) + : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI), + MessagesStr(Messages) {} + + void WriteValues(ArrayRef Vs) { + for (const Value *V : Vs) { + if (!V) + continue; + if (isa(V)) { + MessagesStr << *V << '\n'; + } else { + V->printAsOperand(MessagesStr, true, Mod); + MessagesStr << '\n'; } } + } - /// A check failed, so printout out the condition and the message. - /// - /// This provides a nice place to put a breakpoint if you want to see why - /// something is not correct. - void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } - - /// A check failed (with values to print). - /// - /// This calls the Message-only version so that the above is easier to set - /// a breakpoint on. - template - void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) { - CheckFailed(Message); - WriteValues({V1, Vs...}); - } - }; + /// A check failed, so printout out the condition and the message. + /// + /// This provides a nice place to put a breakpoint if you want to see why + /// something is not correct. + void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } + + /// A check failed (with values to print). + /// + /// This calls the Message-only version so that the above is easier to set + /// a breakpoint on. + template + void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { + CheckFailed(Message); + WriteValues({V1, Vs...}); + } +}; } // end anonymous namespace -char Lint::ID = 0; -INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", - false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", - false, true) - // Assert - We know that cond should be true, if not print an error message. -#define Assert(C, ...) \ - do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false) - -// Lint::run - This is the main Analysis entry point for a -// function. -// -bool Lint::runOnFunction(Function &F) { - Mod = F.getParent(); - DL = &F.getParent()->getDataLayout(); - AA = &getAnalysis().getAAResults(); - AC = &getAnalysis().getAssumptionCache(F); - DT = &getAnalysis().getDomTree(); - TLI = &getAnalysis().getTLI(F); - visit(F); - dbgs() << MessagesStr.str(); - Messages.clear(); - return false; -} +#define Assert(C, ...) \ + do { \ + if (!(C)) { \ + CheckFailed(__VA_ARGS__); \ + return; \ + } \ + } while (false) void Lint::visitFunction(Function &F) { // This isn't undefined behavior, it's just a little unusual, and it's a @@ -281,8 +250,7 @@ void Lint::visitCallBase(CallBase &I) { // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { - Type *Ty = - cast(Formal->getType())->getElementType(); + Type *Ty = cast(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), DL->getABITypeAlign(Ty), Ty, MemRef::Read | MemRef::Write); @@ -309,12 +277,12 @@ void Lint::visitCallBase(CallBase &I) { } } - if (IntrinsicInst *II = dyn_cast(&I)) switch (II->getIntrinsicID()) { - default: break; + default: + break; - // TODO: Check more intrinsics + // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast(&I); @@ -553,7 +521,8 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, VectorType *VecTy = dyn_cast(V->getType()); if (!VecTy) { - KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast(V), DT); + KnownBits Known = + computeKnownBits(V, DL, 0, AC, dyn_cast(V), DT); return Known.isZero(); } @@ -682,11 +651,13 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (!VisitedBlocks.insert(BB).second) break; if (Value *U = - FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) + FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) return findValueImpl(U, OffsetOk, Visited); - if (BBI != BB->begin()) break; + if (BBI != BB->begin()) + break; BB = BB->getUniquePredecessor(); - if (!BB) break; + if (!BB) + break; BBI = BB->end(); } } else if (PHINode *PN = dyn_cast(V)) { @@ -696,8 +667,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (CI->isNoopCast(*DL)) return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast(V)) { - if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), - Ex->getIndices())) + if (Value *W = + FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast(V)) { @@ -728,22 +699,75 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, return V; } +PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { + auto *Mod = F.getParent(); + auto *DL = &F.getParent()->getDataLayout(); + auto *AA = &AM.getResult(F); + auto *AC = &AM.getResult(F); + auto *DT = &AM.getResult(F); + auto *TLI = &AM.getResult(F); + Lint L(Mod, DL, AA, AC, DT, TLI); + L.visit(F); + dbgs() << L.MessagesStr.str(); + return PreservedAnalyses::all(); +} + +class LintLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + LintLegacyPass() : FunctionPass(ID) { + initializeLintLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + void print(raw_ostream &O, const Module *M) const override {} +}; + +char LintLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", + false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", + false, true) + +bool LintLegacyPass::runOnFunction(Function &F) { + auto *Mod = F.getParent(); + auto *DL = &F.getParent()->getDataLayout(); + auto *AA = &getAnalysis().getAAResults(); + auto *AC = &getAnalysis().getAssumptionCache(F); + auto *DT = &getAnalysis().getDomTree(); + auto *TLI = &getAnalysis().getTLI(F); + Lint L(Mod, DL, AA, AC, DT, TLI); + L.visit(F); + dbgs() << L.MessagesStr.str(); + return false; +} + //===----------------------------------------------------------------------===// // Implement the public interfaces to this file... //===----------------------------------------------------------------------===// -FunctionPass *llvm::createLintPass() { - return new Lint(); -} +FunctionPass *llvm::createLintLegacyPassPass() { return new LintLegacyPass(); } /// lintFunction - Check a function for errors, printing messages on stderr. /// void llvm::lintFunction(const Function &f) { - Function &F = const_cast(f); + Function &F = const_cast(f); assert(!F.isDeclaration() && "Cannot lint external functions"); legacy::FunctionPassManager FPM(F.getParent()); - Lint *V = new Lint(); + auto *V = new LintLegacyPass(); FPM.add(V); FPM.run(F); } @@ -752,7 +776,7 @@ void llvm::lintFunction(const Function &f) { /// void llvm::lintModule(const Module &M) { legacy::PassManager PM; - Lint *V = new Lint(); + auto *V = new LintLegacyPass(); PM.add(V); - PM.run(const_cast(M)); + PM.run(const_cast(M)); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 3a6b736dae3cf..9df6a985789ea 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -39,6 +39,7 @@ #include "llvm/Analysis/InstCount.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/Lint.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 12e04ad91128d..b0d1d2a63a830 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -205,6 +205,7 @@ FUNCTION_PASS("irce", IRCEPass()) FUNCTION_PASS("float2int", Float2IntPass()) FUNCTION_PASS("no-op-function", NoOpFunctionPass()) FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass()) +FUNCTION_PASS("lint", LintPass()) FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings()) FUNCTION_PASS("loweratomic", LowerAtomicPass()) FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) diff --git a/llvm/test/Other/lint.ll b/llvm/test/Other/lint.ll index 45c8bd55fa014..a156301c1c26b 100644 --- a/llvm/test/Other/lint.ll +++ b/llvm/test/Other/lint.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -lint -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes=lint -disable-output < %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64" declare fastcc void @bar() From 7fff1fbd3ce1c069aff0f475e896d50a39deb1ac Mon Sep 17 00:00:00 2001 From: Puyan Lotfi Date: Thu, 3 Sep 2020 15:38:52 -0400 Subject: [PATCH 147/465] [MIRVRegNamer] Experimental MachineInstr stable hashing (Fowler-Noll-Vo) This hashing scheme has been useful out of tree, and I want to start experimenting with it. Specifically I want to experiment on the MIRVRegNamer, MIRCanononicalizer, and eventually the MachineOutliner. This diff is a first step, that optionally brings stable hashing to the MIRVRegNamer (and as a result, the MIRCanonicalizer). We've tested this hashing scheme on a lot of MachineOperand types that llvm::hash_value can not handle in a stable manner. This stable hashing was also the basis for "Global Machine Outliner for ThinLTO" in EuroLLVM 2020 http://llvm.org/devmtg/2020-04/talks.html#TechTalk_58 Credits: Kyungwoo Lee, Nikolai Tillmann Differential Revision: https://reviews.llvm.org/D86952 --- llvm/include/llvm/CodeGen/MachineOperand.h | 5 + llvm/include/llvm/CodeGen/MachineStableHash.h | 28 +++ llvm/include/llvm/CodeGen/StableHashing.h | 112 ++++++++++ llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/MIRVRegNamerUtils.cpp | 14 ++ llvm/lib/CodeGen/MachineOperand.cpp | 7 +- llvm/lib/CodeGen/MachineStableHash.cpp | 193 ++++++++++++++++++ .../MIR/AArch64/addrspace-memoperands.mir | 5 +- .../AArch64/mir-canon-constant-pool-hash.mir | 1 + .../MIR/AArch64/mir-canon-jump-table.mir | 1 + .../MIR/AArch64/mirCanonCopyCopyProp.mir | 1 + .../MIR/AArch64/mirCanonIdempotent.mir | 1 + llvm/test/CodeGen/MIR/AArch64/mirnamer.mir | 2 +- .../CodeGen/MIR/AMDGPU/mir-canon-multi.mir | 3 +- .../MIR/AMDGPU/mircanon-memoperands.mir | 3 +- .../MIR/AMDGPU/parse-order-reserved-regs.mir | 1 + .../MIR/Generic/CFPImmMIRCanonHash.mir | 3 +- .../CodeGen/MIR/X86/mir-canon-hash-bb.mir | 2 + .../MIR/X86/mir-namer-hash-frameindex.mir | 1 + llvm/test/CodeGen/MIR/X86/mircanon-flags.mir | 3 +- 20 files changed, 379 insertions(+), 8 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/MachineStableHash.h create mode 100644 llvm/include/llvm/CodeGen/StableHashing.h create mode 100644 llvm/lib/CodeGen/MachineStableHash.cpp diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h index 0f252137364cf..c4fe67c419cd6 100644 --- a/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/llvm/include/llvm/CodeGen/MachineOperand.h @@ -759,6 +759,11 @@ class MachineOperand { bool isKill = false, bool isDead = false, bool isUndef = false, bool isDebug = false); + /// getTargetIndexName - If this MachineOperand is a TargetIndex that has a + /// name, attempt to get the name. Returns nullptr if the TargetIndex does not + /// have a name. Asserts if MO is not a TargetIndex. + const char *getTargetIndexName() const; + //===--------------------------------------------------------------------===// // Construction methods. //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/MachineStableHash.h b/llvm/include/llvm/CodeGen/MachineStableHash.h new file mode 100644 index 0000000000000..19bd28a794776 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachineStableHash.h @@ -0,0 +1,28 @@ +//===------------ MIRVRegNamerUtils.h - MIR VReg Renaming Utilities -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Stable hashing for MachineInstr and MachineOperand. Useful or getting a +// hash across runs, modules, etc. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINESTABLEHASH_H +#define LLVM_CODEGEN_MACHINESTABLEHASH_H + +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/StableHashing.h" + +namespace llvm { +stable_hash stableHashValue(const MachineOperand &MO); +stable_hash stableHashValue(const MachineInstr &MI, bool HashVRegs = false, + bool HashConstantPoolIndices = false, + bool HashMemOperands = false); + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/CodeGen/StableHashing.h b/llvm/include/llvm/CodeGen/StableHashing.h new file mode 100644 index 0000000000000..c6113aa93c800 --- /dev/null +++ b/llvm/include/llvm/CodeGen/StableHashing.h @@ -0,0 +1,112 @@ +//===- llvm/CodeGen/StableHashing.h - Utilities for stable hashing * C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides types and functions for computing and combining stable +// hashes. Stable hashes can be useful for hashing across different modules, +// processes, or compiler runs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_STABLEHASHING_H +#define LLVM_CODEGEN_STABLEHASHING_H + +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +/// An opaque object representing a stable hash code. It can be serialized, +/// deserialized, and is stable across processes and executions. +using stable_hash = uint64_t; + +// Implementation details +namespace hashing { +namespace detail { + +// Stable hashes are based on the 64-bit FNV-1 hash: +// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function + +const uint64_t FNV_PRIME_64 = 1099511628211u; +const uint64_t FNV_OFFSET_64 = 14695981039346656037u; + +inline void stable_hash_append(stable_hash &Hash, const char Value) { + Hash = Hash ^ (Value & 0xFF); + Hash = Hash * FNV_PRIME_64; +} + +inline void stable_hash_append(stable_hash &Hash, stable_hash Value) { + for (unsigned I = 0; I < 8; ++I) { + stable_hash_append(Hash, (const char)Value); + Value >>= 8; + } +} + +} // namespace detail +} // namespace hashing + +inline stable_hash stable_hash_combine(stable_hash A, stable_hash B) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + hashing::detail::stable_hash_append(Hash, A); + hashing::detail::stable_hash_append(Hash, B); + return Hash; +} + +inline stable_hash stable_hash_combine(stable_hash A, stable_hash B, + stable_hash C) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + hashing::detail::stable_hash_append(Hash, A); + hashing::detail::stable_hash_append(Hash, B); + hashing::detail::stable_hash_append(Hash, C); + return Hash; +} + +inline stable_hash stable_hash_combine(stable_hash A, stable_hash B, + stable_hash C, stable_hash D) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + hashing::detail::stable_hash_append(Hash, A); + hashing::detail::stable_hash_append(Hash, B); + hashing::detail::stable_hash_append(Hash, C); + hashing::detail::stable_hash_append(Hash, D); + return Hash; +} + +/// Compute a stable_hash for a sequence of values. +/// +/// This hashes a sequence of values. It produces the same stable_hash as +/// 'stable_hash_combine(a, b, c, ...)', but can run over arbitrary sized +/// sequences and is significantly faster given pointers and types which +/// can be hashed as a sequence of bytes. +template +stable_hash stable_hash_combine_range(InputIteratorT First, + InputIteratorT Last) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + for (auto I = First; I != Last; ++I) + hashing::detail::stable_hash_append(Hash, *I); + return Hash; +} + +inline stable_hash stable_hash_combine_array(const stable_hash *P, size_t C) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + for (size_t I = 0; I < C; ++I) + hashing::detail::stable_hash_append(Hash, P[I]); + return Hash; +} + +inline stable_hash stable_hash_combine_string(const StringRef &S) { + return stable_hash_combine_range(S.begin(), S.end()); +} + +inline stable_hash stable_hash_combine_string(const char *C) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + while (*C) + hashing::detail::stable_hash_append(Hash, *(C++)); + return Hash; +} + +} // namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 40512013e4e10..617692a347922 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -136,6 +136,7 @@ add_llvm_component_library(LLVMCodeGen RegisterPressure.cpp RegisterScavenging.cpp RenameIndependentSubregs.cpp + MachineStableHash.cpp MIRVRegNamerUtils.cpp MIRNamerPass.cpp MIRCanonicalizerPass.cpp diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index 54441301d65b1..3d4f66f311749 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -8,6 +8,7 @@ #include "MIRVRegNamerUtils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineStableHash.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" @@ -15,6 +16,11 @@ using namespace llvm; #define DEBUG_TYPE "mir-vregnamer-utils" +static cl::opt + UseStableNamerHash("mir-vreg-namer-use-stable-hash", cl::init(false), + cl::Hidden, + cl::desc("Use Stable Hashing for MIR VReg Renaming")); + using VRegRenameMap = std::map; bool VRegRenamer::doVRegRenaming(const VRegRenameMap &VRM) { @@ -52,6 +58,14 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { std::string S; raw_string_ostream OS(S); + if (UseStableNamerHash) { + auto Hash = stableHashValue(MI, /* HashVRegs */ true, + /* HashConstantPoolIndices */ true, + /* HashMemOperands */ true); + assert(Hash && "Expected non-zero Hash"); + return std::to_string(Hash).substr(0, 5); + } + // Gets a hashable artifact from a given MachineOperand (ie an unsigned). auto GetHashableMO = [this](const MachineOperand &MO) -> unsigned { switch (MO.getType()) { diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index cece914d2eb17..ce33cdb28b1e4 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -415,6 +415,11 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) { return nullptr; } +const char *MachineOperand::getTargetIndexName() const { + const MachineFunction *MF = getMFIfAvailable(*this); + return MF ? ::getTargetIndexName(*MF, this->getIndex()) : nullptr; +} + static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) { auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); for (const auto &I : Flags) { @@ -823,7 +828,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "target-index("; const char *Name = ""; if (const MachineFunction *MF = getMFIfAvailable(*this)) - if (const auto *TargetIndexName = getTargetIndexName(*MF, getIndex())) + if (const auto *TargetIndexName = ::getTargetIndexName(*MF, getIndex())) Name = TargetIndexName; OS << Name << ')'; printOperandOffset(OS, getOffset()); diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp new file mode 100644 index 0000000000000..7106d4172ce0d --- /dev/null +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -0,0 +1,193 @@ +//===- lib/CodeGen/MachineStableHash.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Stable hashing for MachineInstr and MachineOperand. Useful or getting a +// hash across runs, modules, etc. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineStableHash.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/MIRFormatter.h" +#include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StableHashing.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "machine-stable-hash" + +using namespace llvm; + +STATISTIC(StableHashBailingMachineBasicBlock, + "Number of encountered unsupported MachineOperands that were " + "MachineBasicBlocks while computing stable hashes"); +STATISTIC(StableHashBailingConstantPoolIndex, + "Number of encountered unsupported MachineOperands that were " + "ConstantPoolIndex while computing stable hashes"); +STATISTIC(StableHashBailingTargetIndexNoName, + "Number of encountered unsupported MachineOperands that were " + "TargetIndex with no name"); +STATISTIC(StableHashBailingGlobalAddress, + "Number of encountered unsupported MachineOperands that were " + "GlobalAddress while computing stable hashes"); +STATISTIC(StableHashBailingBlockAddress, + "Number of encountered unsupported MachineOperands that were " + "BlockAddress while computing stable hashes"); +STATISTIC(StableHashBailingMetadataUnsupported, + "Number of encountered unsupported MachineOperands that were " + "Metadata of an unsupported kind while computing stable hashes"); + +stable_hash llvm::stableHashValue(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + if (Register::isVirtualRegister(MO.getReg())) { + const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo(); + return MRI.getVRegDef(MO.getReg())->getOpcode(); + } + + // Register operands don't have target flags. + return stable_hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), + MO.isDef()); + case MachineOperand::MO_Immediate: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); + case MachineOperand::MO_CImmediate: + case MachineOperand::MO_FPImmediate: { + auto Val = MO.isCImm() ? MO.getCImm()->getValue() + : MO.getFPImm()->getValueAPF().bitcastToAPInt(); + auto ValHash = + stable_hash_combine_array(Val.getRawData(), Val.getNumWords()); + return hash_combine(MO.getType(), MO.getTargetFlags(), ValHash); + } + + case MachineOperand::MO_MachineBasicBlock: + StableHashBailingMachineBasicBlock++; + return 0; + case MachineOperand::MO_ConstantPoolIndex: + StableHashBailingConstantPoolIndex++; + return 0; + case MachineOperand::MO_BlockAddress: + StableHashBailingBlockAddress++; + return 0; + case MachineOperand::MO_Metadata: + StableHashBailingMetadataUnsupported++; + return 0; + case MachineOperand::MO_GlobalAddress: + StableHashBailingGlobalAddress++; + return 0; + case MachineOperand::MO_TargetIndex: { + if (const char *Name = MO.getTargetIndexName()) + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_combine_string(Name), + MO.getOffset()); + StableHashBailingTargetIndexNoName++; + return 0; + } + + case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_JumpTableIndex: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getIndex()); + + case MachineOperand::MO_ExternalSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), + stable_hash_combine_string(MO.getSymbolName())); + + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + + case MachineOperand::MO_ShuffleMask: { + std::vector ShuffleMaskHashes; + + llvm::transform( + MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes), + [](int S) -> llvm::stable_hash { return llvm::stable_hash(S); }); + + return hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_combine_array(ShuffleMaskHashes.data(), + ShuffleMaskHashes.size())); + } + case MachineOperand::MO_MCSymbol: { + auto SymbolName = MO.getMCSymbol()->getName(); + return hash_combine(MO.getType(), MO.getTargetFlags(), + stable_hash_combine_string(SymbolName)); + } + case MachineOperand::MO_CFIIndex: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getCFIIndex()); + case MachineOperand::MO_IntrinsicID: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getIntrinsicID()); + case MachineOperand::MO_Predicate: + return stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getPredicate()); + } + llvm_unreachable("Invalid machine operand type"); +} + +/// A stable hash value for machine instructions. +/// Returns 0 if no stable hash could be computed. +/// The hashing and equality testing functions ignore definitions so this is +/// useful for CSE, etc. +stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, + bool HashConstantPoolIndices, + bool HashMemOperands) { + // Build up a buffer of hash code components. + SmallVector HashComponents; + HashComponents.reserve(MI.getNumOperands() + MI.getNumMemOperands() + 2); + HashComponents.push_back(MI.getOpcode()); + HashComponents.push_back(MI.getFlags()); + for (const MachineOperand &MO : MI.operands()) { + if (!HashVRegs && MO.isReg() && MO.isDef() && + Register::isVirtualRegister(MO.getReg())) + continue; // Skip virtual register defs. + + if (MO.isCPI()) { + HashComponents.push_back(stable_hash_combine( + MO.getType(), MO.getTargetFlags(), MO.getIndex())); + continue; + } + + stable_hash StableHash = stableHashValue(MO); + if (!StableHash) + return 0; + HashComponents.push_back(StableHash); + } + + for (const auto *Op : MI.memoperands()) { + if (!HashMemOperands) + break; + HashComponents.push_back(static_cast(Op->getSize())); + HashComponents.push_back(static_cast(Op->getFlags())); + HashComponents.push_back(static_cast(Op->getOffset())); + HashComponents.push_back(static_cast(Op->getOrdering())); + HashComponents.push_back(static_cast(Op->getAddrSpace())); + HashComponents.push_back(static_cast(Op->getSyncScopeID())); + HashComponents.push_back(static_cast(Op->getBaseAlign().value())); + HashComponents.push_back(static_cast(Op->getFailureOrdering())); + } + + return stable_hash_combine_range(HashComponents.begin(), + HashComponents.end()); +} diff --git a/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir b/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir index bc5c06d7f674b..e84ed9c368906 100644 --- a/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir +++ b/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir @@ -1,5 +1,6 @@ -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass none -o - %s | FileCheck %s -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass mir-canonicalizer -o - %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass none -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass mir-canonicalizer -verify-machineinstrs -o - %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - %s --- | diff --git a/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir b/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir index 7ece521bedbfd..78ed554687fa2 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir @@ -1,4 +1,5 @@ # RUN: llc -o - -run-pass mir-canonicalizer -verify-machineinstrs %s | FileCheck %s +# RUN: llc -o - -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs %s | FileCheck %s --- | target triple = "aarch64-unknown-unknown" define void @f() { unreachable } diff --git a/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir b/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir index dbb6b62b68bb7..6d3124c61db1b 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir @@ -1,4 +1,5 @@ # RUN: llc -run-pass mir-canonicalizer -verify-machineinstrs -mtriple aarch64-unknown-linux-gnu -o - %s | FileCheck %s +# RUN: llc -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -mtriple aarch64-unknown-linux-gnu -o - %s | FileCheck %s ... --- name: foo diff --git a/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir b/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir index e95b838030c74..4b03f42083dd7 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -verify-machineinstrs -run-pass mir-canonicalizer %s | FileCheck %s +# RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -verify-machineinstrs -mir-vreg-namer-use-stable-hash -run-pass mir-canonicalizer %s | FileCheck %s ... --- diff --git a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir index 74eae56f9cb66..924a34d5ce43d 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -verify-machineinstrs -run-pass mir-canonicalizer %s | FileCheck %s +# RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -mir-vreg-namer-use-stable-hash -verify-machineinstrs -run-pass mir-canonicalizer %s | FileCheck %s # These Idempotent instructions are sorted alphabetically (based on after the '=') # CHECK: %bb0_{{[0-9]+}}__1:gpr64 = MOVi64imm 4617315517961601024 # CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 408 diff --git a/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir b/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir index 5d63f4107b921..ef4939e47136a 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir @@ -1,4 +1,4 @@ -# RUN: llc -x mir -mtriple aarch64-apple-ios -run-pass mir-namer -verify-machineinstrs -o - < %s | FileCheck %s +# RUN: llc -x mir -mtriple aarch64-apple-ios -run-pass mir-namer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - < %s | FileCheck %s --- name: foo diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir index 295ab75b6d825..6bde24eb33192 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir @@ -1,4 +1,5 @@ -# RUN: llc -o - -march=amdgcn -run-pass mir-canonicalizer %s | FileCheck %s +# RUN: llc -o - -march=amdgcn -run-pass mir-canonicalizer -verify-machineinstrs %s | FileCheck %s +# RUN: llc -o - -march=amdgcn -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs %s | FileCheck %s # This tests for the itereator invalidation fix (reviews.llvm.org/D62713) ... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir index ea2f7de26875b..31f7d7b1b6a40 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir @@ -1,4 +1,5 @@ -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass mir-canonicalizer -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass mir-canonicalizer -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - %s | FileCheck %s --- | target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" diff --git a/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir b/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir index 8bed8fe6af167..80230a59928db 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir @@ -1,5 +1,6 @@ # RUN: llc -march=amdgcn -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -march=amdgcn -run-pass mir-canonicalizer -verify-machineinstrs -o - %s +# RUN: llc -march=amdgcn -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - %s # Previously getReservedRegs was called before parsing # machineFunctionInfo, but the AMDGPU implementation depends on diff --git a/llvm/test/CodeGen/MIR/Generic/CFPImmMIRCanonHash.mir b/llvm/test/CodeGen/MIR/Generic/CFPImmMIRCanonHash.mir index 2202c74d643dd..91301af0f314b 100644 --- a/llvm/test/CodeGen/MIR/Generic/CFPImmMIRCanonHash.mir +++ b/llvm/test/CodeGen/MIR/Generic/CFPImmMIRCanonHash.mir @@ -1,4 +1,5 @@ -# RUN: llc -run-pass mir-canonicalizer -o - %s | FileCheck %s +# RUN: llc -run-pass mir-canonicalizer -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - %s | FileCheck %s --- name: cimm_fpimm_hash_test body: | diff --git a/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir index 94c69f1be36a6..a0a2f9e378efa 100644 --- a/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir +++ b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir @@ -1,5 +1,7 @@ # RUN: llc -run-pass mir-namer -x mir -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -run-pass mir-canonicalizer -x mir -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -run-pass mir-namer -mir-vreg-namer-use-stable-hash -x mir -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -x mir -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/MIR/X86/mir-namer-hash-frameindex.mir b/llvm/test/CodeGen/MIR/X86/mir-namer-hash-frameindex.mir index 68158563a6de7..23d5c2e7b60a2 100644 --- a/llvm/test/CodeGen/MIR/X86/mir-namer-hash-frameindex.mir +++ b/llvm/test/CodeGen/MIR/X86/mir-namer-hash-frameindex.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple x86_64-linux-gnu -run-pass mir-canonicalizer -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple x86_64-linux-gnu -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs %s -o - | FileCheck %s ... --- diff --git a/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir b/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir index d3c797ba8df57..bc5991ea41b5f 100644 --- a/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir +++ b/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir @@ -1,4 +1,5 @@ -# RUN: llc -march=x86-64 -run-pass mir-canonicalizer -o - %s | FileCheck %s +# RUN: llc -march=x86-64 -run-pass mir-canonicalizer -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=x86-64 -run-pass mir-canonicalizer -mir-vreg-namer-use-stable-hash -verify-machineinstrs -o - %s | FileCheck %s # The purpose of this test is to ensure that differing flags do in-fact cause # naming collisions with the new vreg renamers naming scheme. --- | From 952dc7f288a271e53bae6fa082ad045ade39ffb3 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 3 Sep 2020 20:13:38 +0000 Subject: [PATCH 148/465] [gn build] Port 7fff1fbd3ce --- llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index 0a45a48d9dcaf..e2f6c710496ec 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -128,6 +128,7 @@ static_library("CodeGen") { "MachineScheduler.cpp", "MachineSink.cpp", "MachineSizeOpts.cpp", + "MachineStableHash.cpp", "MachineStripDebug.cpp", "MachineTraceMetrics.cpp", "MachineVerifier.cpp", From 5cda4dc7b4d28fcd11307d4234c513ff779a1c6f Mon Sep 17 00:00:00 2001 From: Dokyung Song Date: Mon, 17 Aug 2020 16:59:59 +0000 Subject: [PATCH 149/465] [libFuzzer] Scale energy assigned to each input based on input execution time. This patch scales the energy computed by the Entropic schedule based on the execution time of each input. The input execution time is compared with the average execution time of inputs in the corpus, and, based on the amount by which they differ, the energy is scaled from 0.1x (for inputs executing slow) to 3x (for inputs executing fast). Note that the exact scaling criteria and formula is borrowed from AFL. On FuzzBench, this gives a sizeable throughput increase, which in turn leads to more coverage on several benchmarks. For details, see the following report. https://storage.googleapis.com/fuzzer-test-suite-public/exectime-report/index.html Differential Revision: https://reviews.llvm.org/D86092 --- compiler-rt/lib/fuzzer/FuzzerCorpus.h | 49 ++++++++++++++++--- compiler-rt/lib/fuzzer/FuzzerDriver.cpp | 2 + compiler-rt/lib/fuzzer/FuzzerFlags.def | 5 ++ compiler-rt/lib/fuzzer/FuzzerLoop.cpp | 3 +- compiler-rt/lib/fuzzer/FuzzerOptions.h | 1 + .../lib/fuzzer/tests/FuzzerUnittest.cpp | 10 ++-- .../fuzzer/EntropicScalePerExecTimeTest.cpp | 33 +++++++++++++ .../fuzzer/entropic-scale-per-exec-time.test | 8 +++ 8 files changed, 100 insertions(+), 11 deletions(-) create mode 100644 compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp create mode 100644 compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test diff --git a/compiler-rt/lib/fuzzer/FuzzerCorpus.h b/compiler-rt/lib/fuzzer/FuzzerCorpus.h index 6b45d37a4b140..daea4f5213b18 100644 --- a/compiler-rt/lib/fuzzer/FuzzerCorpus.h +++ b/compiler-rt/lib/fuzzer/FuzzerCorpus.h @@ -18,6 +18,7 @@ #include "FuzzerSHA1.h" #include "FuzzerTracePC.h" #include +#include #include #include #include @@ -26,6 +27,7 @@ namespace fuzzer { struct InputInfo { Unit U; // The actual input data. + std::chrono::microseconds TimeOfUnit; uint8_t Sha1[kSHA1NumBytes]; // Checksum. // Number of features that this input has and no smaller input has. size_t NumFeatures = 0; @@ -62,11 +64,15 @@ struct InputInfo { } // Assign more energy to a high-entropy seed, i.e., that reveals more - // information about the globally rare features in the neighborhood - // of the seed. Since we do not know the entropy of a seed that has - // never been executed we assign fresh seeds maximum entropy and - // let II->Energy approach the true entropy from above. - void UpdateEnergy(size_t GlobalNumberOfFeatures) { + // information about the globally rare features in the neighborhood of the + // seed. Since we do not know the entropy of a seed that has never been + // executed we assign fresh seeds maximum entropy and let II->Energy approach + // the true entropy from above. If ScalePerExecTime is true, the computed + // entropy is scaled based on how fast this input executes compared to the + // average execution time of inputs. The faster an input executes, the more + // energy gets assigned to the input. + void UpdateEnergy(size_t GlobalNumberOfFeatures, bool ScalePerExecTime, + std::chrono::microseconds AverageUnitExecutionTime) { Energy = 0.0; SumIncidence = 0; @@ -89,6 +95,27 @@ struct InputInfo { // Normalize. if (SumIncidence != 0) Energy = (Energy / SumIncidence) + logl(SumIncidence); + + if (ScalePerExecTime) { + // Scaling to favor inputs with lower execution time. + uint32_t PerfScore = 100; + if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 10) + PerfScore = 10; + else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 4) + PerfScore = 25; + else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 2) + PerfScore = 50; + else if (TimeOfUnit.count() * 3 > AverageUnitExecutionTime.count() * 4) + PerfScore = 75; + else if (TimeOfUnit.count() * 4 < AverageUnitExecutionTime.count()) + PerfScore = 300; + else if (TimeOfUnit.count() * 3 < AverageUnitExecutionTime.count()) + PerfScore = 200; + else if (TimeOfUnit.count() * 2 < AverageUnitExecutionTime.count()) + PerfScore = 150; + + Energy *= PerfScore; + } } // Increment the frequency of the feature Idx. @@ -121,6 +148,7 @@ struct EntropicOptions { bool Enabled; size_t NumberOfRarestFeatures; size_t FeatureFrequencyThreshold; + bool ScalePerExecTime; }; class InputCorpus { @@ -179,6 +207,7 @@ class InputCorpus { const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, bool HasFocusFunction, bool NeverReduce, + std::chrono::microseconds TimeOfUnit, const Vector &FeatureSet, const DataFlowTrace &DFT, const InputInfo *BaseII) { assert(!U.empty()); @@ -189,6 +218,7 @@ class InputCorpus { II.U = U; II.NumFeatures = NumFeatures; II.NeverReduce = NeverReduce; + II.TimeOfUnit = TimeOfUnit; II.MayDeleteFile = MayDeleteFile; II.UniqFeatureSet = FeatureSet; II.HasFocusFunction = HasFocusFunction; @@ -471,12 +501,19 @@ class InputCorpus { Weights.resize(N); std::iota(Intervals.begin(), Intervals.end(), 0); + std::chrono::microseconds AverageUnitExecutionTime(0); + for (auto II : Inputs) { + AverageUnitExecutionTime += II->TimeOfUnit; + } + AverageUnitExecutionTime /= N; + bool VanillaSchedule = true; if (Entropic.Enabled) { for (auto II : Inputs) { if (II->NeedsEnergyUpdate && II->Energy != 0.0) { II->NeedsEnergyUpdate = false; - II->UpdateEnergy(RareFeatures.size()); + II->UpdateEnergy(RareFeatures.size(), Entropic.ScalePerExecTime, + AverageUnitExecutionTime); } } diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index 93b1ff658e42e..caafd1dbb0a7b 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -764,6 +764,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { (size_t)Flags.entropic_feature_frequency_threshold; Options.EntropicNumberOfRarestFeatures = (size_t)Flags.entropic_number_of_rarest_features; + Options.EntropicScalePerExecTime = Flags.entropic_scale_per_exec_time; if (Options.Entropic) { if (!Options.FocusFunction.empty()) { Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot " @@ -779,6 +780,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Entropic.FeatureFrequencyThreshold = Options.EntropicFeatureFrequencyThreshold; Entropic.NumberOfRarestFeatures = Options.EntropicNumberOfRarestFeatures; + Entropic.ScalePerExecTime = Options.EntropicScalePerExecTime; unsigned Seed = Flags.seed; // Initialize Seed. diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def index 5194666634b43..fdb8362cef9d4 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -175,6 +175,11 @@ FUZZER_FLAG_INT(entropic_number_of_rarest_features, 100, "Experimental. If " "entropic is enabled, we keep track of the frequencies only for the " "Top-X least abundant features (union features that are considered as " "rare).") +FUZZER_FLAG_INT(entropic_scale_per_exec_time, 0, "Experimental. If 1, " + "the Entropic power schedule gets scaled based on the input execution " + "time. Inputs with lower execution time get scheduled more (up to 30x). " + "Note that, if 1, fuzzer stops from being deterministic even if a " + "non-zero random seed is given.") FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") FUZZER_DEPRECATED_FLAG(use_clang_coverage) diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index f97b4d2f7f015..f9986dd8eea51 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -470,6 +470,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, return false; ExecuteCallback(Data, Size); + auto TimeOfUnit = duration_cast(UnitStopTime - UnitStartTime); UniqFeatureSetTmp.clear(); size_t FoundUniqFeaturesOfII = 0; @@ -493,7 +494,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, auto NewII = Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile, TPC.ObservedFocusFunction(), ForceAddToCorpus, - UniqFeatureSetTmp, DFT, II); + TimeOfUnit, UniqFeatureSetTmp, DFT, II); WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1), NewII->UniqFeatureSet); return true; diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h index 281658dadf04b..b17a7474d38f0 100644 --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -49,6 +49,7 @@ struct FuzzingOptions { bool Entropic = false; size_t EntropicFeatureFrequencyThreshold = 0xFF; size_t EntropicNumberOfRarestFeatures = 100; + bool EntropicScalePerExecTime = false; std::string OutputCorpus; std::string ArtifactPrefix = "./"; std::string ExactArtifactPath; diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp index 93b54f54c19aa..e17fca8fe9ed6 100644 --- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -599,7 +599,9 @@ TEST(Corpus, Distribution) { for (size_t i = 0; i < N; i++) C->AddToCorpus(Unit{static_cast(i)}, /*NumFeatures*/ 1, /*MayDeleteFile*/ false, /*HasFocusFunction*/ false, - /*ForceAddToCorpus*/ false, /*FeatureSet*/ {}, DFT, + /*ForceAddToCorpus*/ false, + /*TimeOfUnit*/ std::chrono::microseconds(0), + /*FeatureSet*/ {}, DFT, /*BaseII*/ nullptr); Vector Hist(N); @@ -1101,17 +1103,17 @@ TEST(Entropic, ComputeEnergy) { Vector> FeatureFreqs = {{1, 3}, {2, 3}, {3, 3}}; II->FeatureFreqs = FeatureFreqs; II->NumExecutedMutations = 0; - II->UpdateEnergy(4); + II->UpdateEnergy(4, false, std::chrono::microseconds(0)); EXPECT_LT(SubAndSquare(II->Energy, 1.450805), Precision); II->NumExecutedMutations = 9; - II->UpdateEnergy(5); + II->UpdateEnergy(5, false, std::chrono::microseconds(0)); EXPECT_LT(SubAndSquare(II->Energy, 1.525496), Precision); II->FeatureFreqs[0].second++; II->FeatureFreqs.push_back(std::pair(42, 6)); II->NumExecutedMutations = 20; - II->UpdateEnergy(10); + II->UpdateEnergy(10, false, std::chrono::microseconds(0)); EXPECT_LT(SubAndSquare(II->Energy, 1.792831), Precision); } diff --git a/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp b/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp new file mode 100644 index 0000000000000..ec3a6906d3886 --- /dev/null +++ b/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp @@ -0,0 +1,33 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// Tests whether scaling the Entropic scheduling weight based on input execution +// time is effective or not. Inputs of size 10 will take at least 100 +// microseconds more than any input of size 1-9. The input of size 2 in the +// corpus should be favored by the exec-time-scaled Entropic scheduling policy +// than the input of size 10 in the corpus, eventually finding the crashing +// input {0xab, 0xcd} with less executions. +#include +#include +#include + +static volatile int Sink; +static volatile int *Nil = nullptr; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 10) + return 0; // To make the test quicker. + + if (Size == 10) { + size_t ExecTimeUSec = 100; + std::this_thread::sleep_for(std::chrono::microseconds(ExecTimeUSec)); + + Sink = 0; // execute a lot slower than the crashing input below. + } + + if (Size == 2 && Data[0] == 0xab && Data[1] == 0xcd) + *Nil = 42; // crash. + + return 0; +} diff --git a/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test b/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test new file mode 100644 index 0000000000000..d34550f9c951f --- /dev/null +++ b/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test @@ -0,0 +1,8 @@ +REQUIRES: linux, x86_64 +RUN: %cpp_compiler %S/EntropicScalePerExecTimeTest.cpp -o %t-EntropicScalePerExecTimeTest +RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -entropic_scale_per_exec_time=1 -seed=1 -runs=100000 -max_len=10 + +# The following test is added as a comment here for reference, which should +# take more runs than with -entropic_scale_per_exec_time=1 to find the crash. +# (it takes 126,633 runs) +# RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -seed=1 -runs=200000 -max_len=10 From 0f1be87e294751a0941f1d9b7785ebf4d8072149 Mon Sep 17 00:00:00 2001 From: Erik Pilkington Date: Thu, 3 Sep 2020 12:41:34 -0400 Subject: [PATCH 150/465] [Sema] Fix a -Warc-repeated-use-of-weak false-positive by only calling CheckPlaceholderExpr once Previously, this code discarded the result of CheckPlaceholderExpr for non-matrix subexpressions. Not only is this wasteful, but it was creating a Warc-repeated-use-of-weak false-positive on the attached testcase, since the discarded expression was still registered as a use of the weak property. rdar://66162246 Differential revision: https://reviews.llvm.org/D87102 --- clang/lib/Sema/SemaExpr.cpp | 45 +++++++++++------------- clang/test/SemaObjC/arc-repeated-weak.mm | 14 ++++++++ 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 450185788537c..cd71ce70c70ef 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -4595,8 +4595,8 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, << SourceRange(base->getBeginLoc(), rbLoc); return ExprError(); } - // If the base is either a MatrixSubscriptExpr or a matrix type, try to create - // a new MatrixSubscriptExpr. + // If the base is a MatrixSubscriptExpr, try to create a new + // MatrixSubscriptExpr. auto *matSubscriptE = dyn_cast(base); if (matSubscriptE) { if (CheckAndReportCommaError(idx)) @@ -4607,34 +4607,13 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, return CreateBuiltinMatrixSubscriptExpr( matSubscriptE->getBase(), matSubscriptE->getRowIdx(), idx, rbLoc); } - Expr *matrixBase = base; - bool IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base); - if (!IsMSPropertySubscript) { - ExprResult result = CheckPlaceholderExpr(base); - if (!result.isInvalid()) - matrixBase = result.get(); - } - if (matrixBase->getType()->isMatrixType()) { - if (CheckAndReportCommaError(idx)) - return ExprError(); - - return CreateBuiltinMatrixSubscriptExpr(matrixBase, idx, nullptr, rbLoc); - } - - // A comma-expression as the index is deprecated in C++2a onwards. - if (getLangOpts().CPlusPlus20 && - ((isa(idx) && cast(idx)->isCommaOp()) || - (isa(idx) && - cast(idx)->getOperator() == OO_Comma))) { - Diag(idx->getExprLoc(), diag::warn_deprecated_comma_subscript) - << SourceRange(base->getBeginLoc(), rbLoc); - } // Handle any non-overload placeholder types in the base and index // expressions. We can't handle overloads here because the other // operand might be an overloadable type, in which case the overload // resolution for the operator overload should get the first crack // at the overload. + bool IsMSPropertySubscript = false; if (base->getType()->isNonOverloadPlaceholderType()) { IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base); if (!IsMSPropertySubscript) { @@ -4644,6 +4623,24 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc, base = result.get(); } } + + // If the base is a matrix type, try to create a new MatrixSubscriptExpr. + if (base->getType()->isMatrixType()) { + if (CheckAndReportCommaError(idx)) + return ExprError(); + + return CreateBuiltinMatrixSubscriptExpr(base, idx, nullptr, rbLoc); + } + + // A comma-expression as the index is deprecated in C++2a onwards. + if (getLangOpts().CPlusPlus20 && + ((isa(idx) && cast(idx)->isCommaOp()) || + (isa(idx) && + cast(idx)->getOperator() == OO_Comma))) { + Diag(idx->getExprLoc(), diag::warn_deprecated_comma_subscript) + << SourceRange(base->getBeginLoc(), rbLoc); + } + if (idx->getType()->isNonOverloadPlaceholderType()) { ExprResult result = CheckPlaceholderExpr(idx); if (result.isInvalid()) return ExprError(); diff --git a/clang/test/SemaObjC/arc-repeated-weak.mm b/clang/test/SemaObjC/arc-repeated-weak.mm index 4eec4d2fe69c7..90388598c7b8d 100644 --- a/clang/test/SemaObjC/arc-repeated-weak.mm +++ b/clang/test/SemaObjC/arc-repeated-weak.mm @@ -485,3 +485,17 @@ void foo1() { @class NSString; static NSString* const kGlobal = @""; + +@interface NSDictionary +- (id)objectForKeyedSubscript:(id)key; +@end + +@interface WeakProp +@property (weak) NSDictionary *nd; +@end + +@implementation WeakProp +-(void)m { + (void)self.nd[@""]; // no warning +} +@end From 8b8be6f38ab568d40869205389a002f32f6558a2 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 1 Sep 2020 00:54:46 -0700 Subject: [PATCH 151/465] [Asan] Cleanup atomic usage in allocator There are no know bugs related to this, still it may fix some latent ones. Main concerns with preexisting code: 1. Inconsistent atomic/non-atomic access to the same field. 2. Assumption that bitfield chunk_state is always the first byte without even taking into account endianness. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D86917 --- compiler-rt/lib/asan/asan_allocator.cpp | 109 ++++++++++++++---------- 1 file changed, 66 insertions(+), 43 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 039605d5ca21e..448dece0a1703 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -72,7 +72,7 @@ static const uptr kAllocBegMagic = 0xCC6E96B9; struct ChunkHeader { // 1-st 8 bytes. - u32 chunk_state : 8; // Must be first. + atomic_uint8_t chunk_state; u32 alloc_tid : 24; u32 free_tid : 24; @@ -88,7 +88,7 @@ struct ChunkHeader { // align < 8 -> 0 // else -> log2(min(align, 512)) - 2 u32 user_requested_alignment_log : 3; - u32 alloc_context_id; + atomic_uint32_t alloc_context_id; }; struct ChunkBase : ChunkHeader { @@ -140,8 +140,13 @@ struct QuarantineCallback { } void Recycle(AsanChunk *m) { - CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); - atomic_store((atomic_uint8_t*)m, CHUNK_AVAILABLE, memory_order_relaxed); + u8 old_chunk_state = CHUNK_QUARANTINE; + if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, + CHUNK_AVAILABLE, + memory_order_acquire)) { + CHECK_EQ(old_chunk_state, CHUNK_QUARANTINE); + } + CHECK_NE(m->alloc_tid, kInvalidTid); CHECK_NE(m->free_tid, kInvalidTid); PoisonShadow(m->Beg(), @@ -301,22 +306,25 @@ struct Allocator { // housekeeping chunk, like TransferBatch. Start by assuming the former. AsanChunk *ac = GetAsanChunk((void *)chunk); uptr allocated_size = allocator.GetActuallyAllocatedSize((void *)ac); - uptr beg = ac->Beg(); - uptr end = ac->Beg() + ac->UsedSize(true); - uptr chunk_end = chunk + allocated_size; - if (chunk < beg && beg < end && end <= chunk_end && - ac->chunk_state == CHUNK_ALLOCATED) { - // Looks like a valid AsanChunk in use, poison redzones only. - PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); - uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); - FastPoisonShadowPartialRightRedzone( - end_aligned_down, end - end_aligned_down, - chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); - } else { - // This is either not an AsanChunk or freed or quarantined AsanChunk. - // In either case, poison everything. - PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); + if (atomic_load(&ac->chunk_state, memory_order_acquire) == + CHUNK_ALLOCATED) { + uptr beg = ac->Beg(); + uptr end = ac->Beg() + ac->UsedSize(true); + uptr chunk_end = chunk + allocated_size; + if (chunk < beg && beg < end && end <= chunk_end) { + // Looks like a valid AsanChunk in use, poison redzones only. + PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); + uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); + FastPoisonShadowPartialRightRedzone( + end_aligned_down, end - end_aligned_down, + chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); + return; + } } + + // This is either not an AsanChunk or freed or quarantined AsanChunk. + // In either case, poison everything. + PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); } void ReInitialize(const AllocatorOptions &options) { @@ -381,14 +389,17 @@ struct Allocator { AsanChunk *right_chunk) { // Prefer an allocated chunk over freed chunk and freed chunk // over available chunk. - if (left_chunk->chunk_state != right_chunk->chunk_state) { - if (left_chunk->chunk_state == CHUNK_ALLOCATED) + u8 left_state = atomic_load(&left_chunk->chunk_state, memory_order_relaxed); + u8 right_state = + atomic_load(&right_chunk->chunk_state, memory_order_relaxed); + if (left_state != right_state) { + if (left_state == CHUNK_ALLOCATED) return left_chunk; - if (right_chunk->chunk_state == CHUNK_ALLOCATED) + if (right_state == CHUNK_ALLOCATED) return right_chunk; - if (left_chunk->chunk_state == CHUNK_QUARANTINE) + if (left_state == CHUNK_QUARANTINE) return left_chunk; - if (right_chunk->chunk_state == CHUNK_QUARANTINE) + if (right_state == CHUNK_QUARANTINE) return right_chunk; } // Same chunk_state: choose based on offset. @@ -403,9 +414,10 @@ struct Allocator { bool UpdateAllocationStack(uptr addr, BufferedStackTrace *stack) { AsanChunk *m = GetAsanChunkByAddr(addr); if (!m) return false; - if (m->chunk_state != CHUNK_ALLOCATED) return false; + if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) + return false; if (m->Beg() != addr) return false; - atomic_store((atomic_uint32_t *)&m->alloc_context_id, StackDepotPut(*stack), + atomic_store(&m->alloc_context_id, StackDepotPut(*stack), memory_order_relaxed); return true; } @@ -524,7 +536,8 @@ struct Allocator { } m->user_requested_alignment_log = user_requested_alignment_log; - m->alloc_context_id = StackDepotPut(*stack); + atomic_store(&m->alloc_context_id, StackDepotPut(*stack), + memory_order_relaxed); uptr size_rounded_down_to_granularity = RoundDownTo(size, SHADOW_GRANULARITY); @@ -557,7 +570,7 @@ struct Allocator { : __lsan::kDirectlyLeaked; #endif // Must be the last mutation of metadata in this function. - atomic_store((atomic_uint8_t *)m, CHUNK_ALLOCATED, memory_order_release); + atomic_store(&m->chunk_state, CHUNK_ALLOCATED, memory_order_release); ASAN_MALLOC_HOOK(res, size); return res; } @@ -565,10 +578,10 @@ struct Allocator { // Set quarantine flag if chunk is allocated, issue ASan error report on // available and quarantined chunks. Return true on success, false otherwise. bool AtomicallySetQuarantineFlagIfAllocated(AsanChunk *m, void *ptr, - BufferedStackTrace *stack) { + BufferedStackTrace *stack) { u8 old_chunk_state = CHUNK_ALLOCATED; // Flip the chunk_state atomically to avoid race on double-free. - if (!atomic_compare_exchange_strong((atomic_uint8_t *)m, &old_chunk_state, + if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, CHUNK_QUARANTINE, memory_order_acquire)) { ReportInvalidFree(ptr, old_chunk_state, stack); @@ -582,7 +595,8 @@ struct Allocator { // Expects the chunk to already be marked as quarantined by using // AtomicallySetQuarantineFlagIfAllocated. void QuarantineChunk(AsanChunk *m, void *ptr, BufferedStackTrace *stack) { - CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); + CHECK_EQ(atomic_load(&m->chunk_state, memory_order_relaxed), + CHUNK_QUARANTINE); CHECK_GE(m->alloc_tid, 0); if (SANITIZER_WORDSIZE == 64) // On 32-bits this resides in user area. CHECK_EQ(m->free_tid, kInvalidTid); @@ -677,7 +691,7 @@ struct Allocator { void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC, true); if (new_ptr) { - u8 chunk_state = m->chunk_state; + u8 chunk_state = atomic_load(&m->chunk_state, memory_order_acquire); if (chunk_state != CHUNK_ALLOCATED) ReportInvalidFree(old_ptr, chunk_state, stack); CHECK_NE(REAL(memcpy), nullptr); @@ -774,7 +788,8 @@ struct Allocator { uptr AllocationSize(uptr p) { AsanChunk *m = GetAsanChunkByAddr(p); if (!m) return 0; - if (m->chunk_state != CHUNK_ALLOCATED) return 0; + if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) + return 0; if (m->Beg() != p) return 0; return m->UsedSize(); } @@ -840,13 +855,16 @@ static AsanAllocator &get_allocator() { } bool AsanChunkView::IsValid() const { - return chunk_ && chunk_->chunk_state != CHUNK_AVAILABLE; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) != + CHUNK_AVAILABLE; } bool AsanChunkView::IsAllocated() const { - return chunk_ && chunk_->chunk_state == CHUNK_ALLOCATED; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == + CHUNK_ALLOCATED; } bool AsanChunkView::IsQuarantined() const { - return chunk_ && chunk_->chunk_state == CHUNK_QUARANTINE; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == + CHUNK_QUARANTINE; } uptr AsanChunkView::Beg() const { return chunk_->Beg(); } uptr AsanChunkView::End() const { return Beg() + UsedSize(); } @@ -867,7 +885,9 @@ static StackTrace GetStackTraceFromId(u32 id) { return res; } -u32 AsanChunkView::GetAllocStackId() const { return chunk_->alloc_context_id; } +u32 AsanChunkView::GetAllocStackId() const { + return atomic_load(&chunk_->alloc_context_id, memory_order_relaxed); +} u32 AsanChunkView::GetFreeStackId() const { return chunk_->free_context_id; } StackTrace AsanChunkView::GetAllocStack() const { @@ -1052,10 +1072,10 @@ void GetAllocatorGlobalRange(uptr *begin, uptr *end) { uptr PointsIntoChunk(void* p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(addr); - if (!m) return 0; - uptr chunk = m->Beg(); - if (m->chunk_state != __asan::CHUNK_ALLOCATED) + if (!m || atomic_load(&m->chunk_state, memory_order_acquire) != + __asan::CHUNK_ALLOCATED) return 0; + uptr chunk = m->Beg(); if (m->AddrIsInside(addr, /*locked_version=*/true)) return chunk; if (IsSpecialCaseOfOperatorNew0(chunk, m->UsedSize(/*locked_version*/ true), @@ -1096,7 +1116,8 @@ LsanMetadata::LsanMetadata(uptr chunk) { bool LsanMetadata::allocated() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return m->chunk_state == __asan::CHUNK_ALLOCATED; + return atomic_load(&m->chunk_state, memory_order_relaxed) == + __asan::CHUNK_ALLOCATED; } ChunkTag LsanMetadata::tag() const { @@ -1116,7 +1137,7 @@ uptr LsanMetadata::requested_size() const { u32 LsanMetadata::stack_trace_id() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return m->alloc_context_id; + return atomic_load(&m->alloc_context_id, memory_order_relaxed); } void ForEachChunk(ForEachChunkCallback callback, void *arg) { @@ -1127,7 +1148,9 @@ IgnoreObjectResult IgnoreObjectLocked(const void *p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddr(addr); if (!m) return kIgnoreObjectInvalid; - if ((m->chunk_state == __asan::CHUNK_ALLOCATED) && m->AddrIsInside(addr)) { + if ((atomic_load(&m->chunk_state, memory_order_acquire) == + __asan::CHUNK_ALLOCATED) && + m->AddrIsInside(addr)) { if (m->lsan_tag == kIgnored) return kIgnoreObjectAlreadyIgnored; m->lsan_tag = __lsan::kIgnored; From 2b71b7f791cd58713df5e32b4b0132c876ed3c5b Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 1 Sep 2020 02:12:15 -0700 Subject: [PATCH 152/465] [NFC][Asan] Set AP32::kMetadataSize to 0 Asan does not use metadata with primary allocators. It should match AP64::kMetadataSize whic is 0. Depends on D86917. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D86919 --- compiler-rt/lib/asan/asan_allocator.h | 2 +- .../lib/sanitizer_common/sanitizer_allocator_primary32.h | 1 + .../lib/sanitizer_common/sanitizer_allocator_primary64.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index b37d8ef4e8d29..d60b97500a3c3 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -171,7 +171,7 @@ template struct AP32 { static const uptr kSpaceBeg = 0; static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE; - static const uptr kMetadataSize = 16; + static const uptr kMetadataSize = 0; typedef __asan::SizeClassMap SizeClassMap; static const uptr kRegionSizeLog = 20; using AddressSpaceView = AddressSpaceViewTy; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h index 47cc42cb411c2..2c25a687c5f08 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h @@ -153,6 +153,7 @@ class SizeClassAllocator32 { } void *GetMetaData(const void *p) { + CHECK(kMetadataSize); CHECK(PointerIsMine(p)); uptr mem = reinterpret_cast(p); uptr beg = ComputeRegionBeg(mem); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h index acc61cc6ba8dd..7af469c56fd6a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h @@ -230,6 +230,7 @@ class SizeClassAllocator64 { static uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); } void *GetMetaData(const void *p) { + CHECK(kMetadataSize); uptr class_id = GetSizeClass(p); uptr size = ClassIdToSize(class_id); uptr chunk_idx = GetChunkIdx(reinterpret_cast(p), size); From 3d54976a704327aea8de85f7b2c36172e42100d9 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 2 Sep 2020 18:20:59 -0700 Subject: [PATCH 153/465] [llvm-symbolizer] Add back --use-symbol-table=true It is used by clusterfuzz (https://github.com/google/clusterfuzz/pull/2009/) and having this compatibility option for a while can help they do bisection with the latest llvm-symbolizer. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D87067 --- llvm/test/tools/llvm-symbolizer/use-symbol-table.s | 7 +++++++ llvm/tools/llvm-symbolizer/Opts.td | 2 ++ 2 files changed, 9 insertions(+) create mode 100644 llvm/test/tools/llvm-symbolizer/use-symbol-table.s diff --git a/llvm/test/tools/llvm-symbolizer/use-symbol-table.s b/llvm/test/tools/llvm-symbolizer/use-symbol-table.s new file mode 100644 index 0000000000000..aed7d43d33916 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/use-symbol-table.s @@ -0,0 +1,7 @@ +# REQUIRES: x86-registered-target + +# RUN: llvm-mc -filetype=obj -triple=x86_64 -g %s -o %t.o + +## --use-symbol-table=true is used by old asan_symbolize.py and Android ndk +## ndk-stack.py. Keep it as a no-op compatibility option for a while. +# RUN: llvm-symbolizer --use-symbol-table=true %t.o diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td index ea28d98cbe275..e8def4ff9a6a4 100644 --- a/llvm/tools/llvm-symbolizer/Opts.td +++ b/llvm/tools/llvm-symbolizer/Opts.td @@ -67,3 +67,5 @@ def : Flag<["--"], "inlining=false">, Alias, HelpText<"Alias for --n // Compatibility aliases for pprof's symbolizer. def : Flag<["-"], "demangle=true">, Alias, HelpText<"Alias for --demangle">; def : Flag<["-"], "demangle=false">, Alias, HelpText<"Alias for --no-demangle">; +// Compatibility no-op options. +def : Flag<["--"], "use-symbol-table=true">; From e6393ee813178e9d3306b8e3c6949a4f32f8a2cb Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 2 Sep 2020 14:42:37 -0700 Subject: [PATCH 154/465] Canonicalize declaration pointers when forming APValues. References to different declarations of the same entity aren't different values, so shouldn't have different representations. --- clang/include/clang/AST/APValue.h | 4 +-- clang/lib/AST/APValue.cpp | 26 +++++++++++++------ clang/lib/AST/ExprConstant.cpp | 18 +++++-------- .../CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp | 3 +-- clang/test/OpenMP/ordered_messages.cpp | 5 +++- 5 files changed, 31 insertions(+), 25 deletions(-) diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h index 87e4bd7f84c11..485e6c2602cff 100644 --- a/clang/include/clang/AST/APValue.h +++ b/clang/include/clang/AST/APValue.h @@ -174,6 +174,7 @@ class APValue { return !(LHS == RHS); } friend llvm::hash_code hash_value(const LValueBase &Base); + friend struct llvm::DenseMapInfo; private: PtrTy Ptr; @@ -201,8 +202,7 @@ class APValue { public: LValuePathEntry() : Value() {} - LValuePathEntry(BaseOrMemberType BaseOrMember) - : Value{reinterpret_cast(BaseOrMember.getOpaqueValue())} {} + LValuePathEntry(BaseOrMemberType BaseOrMember); static LValuePathEntry ArrayIndex(uint64_t Index) { LValuePathEntry Result; Result.Value = Index; diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 2a8834b4db0cb..7531229654cf4 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -38,7 +38,7 @@ static_assert( "Type is insufficiently aligned"); APValue::LValueBase::LValueBase(const ValueDecl *P, unsigned I, unsigned V) - : Ptr(P), Local{I, V} {} + : Ptr(P ? cast(P->getCanonicalDecl()) : nullptr), Local{I, V} {} APValue::LValueBase::LValueBase(const Expr *P, unsigned I, unsigned V) : Ptr(P), Local{I, V} {} @@ -82,13 +82,19 @@ bool operator==(const APValue::LValueBase &LHS, const APValue::LValueBase &RHS) { if (LHS.Ptr != RHS.Ptr) return false; - if (LHS.is()) + if (LHS.is() || LHS.is()) return true; return LHS.Local.CallIndex == RHS.Local.CallIndex && LHS.Local.Version == RHS.Local.Version; } } +APValue::LValuePathEntry::LValuePathEntry(BaseOrMemberType BaseOrMember) { + if (const Decl *D = BaseOrMember.getPointer()) + BaseOrMember.setPointer(D->getCanonicalDecl()); + Value = reinterpret_cast(BaseOrMember.getOpaqueValue()); +} + namespace { struct LVBase { APValue::LValueBase Base; @@ -113,14 +119,16 @@ APValue::LValueBase::operator bool () const { clang::APValue::LValueBase llvm::DenseMapInfo::getEmptyKey() { - return clang::APValue::LValueBase( - DenseMapInfo::getEmptyKey()); + clang::APValue::LValueBase B; + B.Ptr = DenseMapInfo::getEmptyKey(); + return B; } clang::APValue::LValueBase llvm::DenseMapInfo::getTombstoneKey() { - return clang::APValue::LValueBase( - DenseMapInfo::getTombstoneKey()); + clang::APValue::LValueBase B; + B.Ptr = DenseMapInfo::getTombstoneKey(); + return B; } namespace clang { @@ -757,8 +765,10 @@ void APValue::MakeMemberPointer(const ValueDecl *Member, bool IsDerivedMember, assert(isAbsent() && "Bad state change"); MemberPointerData *MPD = new ((void*)(char*)Data.buffer) MemberPointerData; Kind = MemberPointer; - MPD->MemberAndIsDerivedMember.setPointer(Member); + MPD->MemberAndIsDerivedMember.setPointer( + Member ? cast(Member->getCanonicalDecl()) : nullptr); MPD->MemberAndIsDerivedMember.setInt(IsDerivedMember); MPD->resizePath(Path.size()); - memcpy(MPD->getPath(), Path.data(), Path.size()*sizeof(const CXXRecordDecl*)); + for (unsigned I = 0; I != Path.size(); ++I) + MPD->getPath()[I] = Path[I]->getCanonicalDecl(); } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e8f132dd48032..8e43b62662eef 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1978,18 +1978,11 @@ static bool HasSameBase(const LValue &A, const LValue &B) { return false; if (A.getLValueBase().getOpaqueValue() != - B.getLValueBase().getOpaqueValue()) { - const Decl *ADecl = GetLValueBaseDecl(A); - if (!ADecl) - return false; - const Decl *BDecl = GetLValueBaseDecl(B); - if (!BDecl || ADecl->getCanonicalDecl() != BDecl->getCanonicalDecl()) - return false; - } + B.getLValueBase().getOpaqueValue()) + return false; - return IsGlobalLValue(A.getLValueBase()) || - (A.getLValueCallIndex() == B.getLValueCallIndex() && - A.getLValueVersion() == B.getLValueVersion()); + return A.getLValueCallIndex() == B.getLValueCallIndex() && + A.getLValueVersion() == B.getLValueVersion(); } static void NoteLValueLocation(EvalInfo &Info, APValue::LValueBase Base) { @@ -3108,7 +3101,8 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E, // If we're currently evaluating the initializer of this declaration, use that // in-flight value. - if (Info.EvaluatingDecl.dyn_cast() == VD) { + if (declaresSameEntity(Info.EvaluatingDecl.dyn_cast(), + VD)) { Result = Info.EvaluatingDeclValue; return true; } diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp index 8d51dbde71776..3720b277af7a9 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp @@ -24,11 +24,10 @@ constexpr double &ni3; // expected-error {{declaration of reference variable 'ni constexpr int nc1 = i; // expected-error {{constexpr variable 'nc1' must be initialized by a constant expression}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}} constexpr C nc2 = C(); // expected-error {{cannot have non-literal type 'const C'}} -int &f(); // expected-note {{declared here}} +int &f(); // expected-note 2{{declared here}} constexpr int &nc3 = f(); // expected-error {{constexpr variable 'nc3' must be initialized by a constant expression}} expected-note {{non-constexpr function 'f' cannot be used in a constant expression}} constexpr int nc4(i); // expected-error {{constexpr variable 'nc4' must be initialized by a constant expression}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}} constexpr C nc5((C())); // expected-error {{cannot have non-literal type 'const C'}} -int &f(); // expected-note {{here}} constexpr int &nc6(f()); // expected-error {{constexpr variable 'nc6' must be initialized by a constant expression}} expected-note {{non-constexpr function 'f'}} struct pixel { diff --git a/clang/test/OpenMP/ordered_messages.cpp b/clang/test/OpenMP/ordered_messages.cpp index f6b9dbd6d27fa..8a3a86443eb8c 100644 --- a/clang/test/OpenMP/ordered_messages.cpp +++ b/clang/test/OpenMP/ordered_messages.cpp @@ -16,6 +16,9 @@ void xxx(int argc) { } int foo(); +#if __cplusplus >= 201103L +// expected-note@-2 {{declared here}} +#endif template T foo() { @@ -176,7 +179,7 @@ T foo() { int foo() { #if __cplusplus >= 201103L -// expected-note@-2 2 {{declared here}} +// expected-note@-2 {{declared here}} #endif int k; #pragma omp for ordered From 052dbe226cb3540c77cf0b3dc4a51a4ab7726b55 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Thu, 3 Sep 2020 15:33:07 -0700 Subject: [PATCH 155/465] Remove unused and dangerous overload of PerformImplicitConversion. Previously we had two overloads where the only real difference beyond parameter order was whether a reference parameter is const, where one overload treated the reference parameter as an in-parameter and the other treated it as an out-parameter! --- clang/include/clang/Sema/Sema.h | 4 ---- clang/lib/Sema/SemaOverload.cpp | 28 ++++++++++------------------ 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 174b424bb996a..ec449d6dd6be4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11224,10 +11224,6 @@ class Sema final { ExprResult PerformImplicitConversion(Expr *From, QualType ToType, AssignmentAction Action, bool AllowExplicit = false); - ExprResult PerformImplicitConversion(Expr *From, QualType ToType, - AssignmentAction Action, - bool AllowExplicit, - ImplicitConversionSequence& ICS); ExprResult PerformImplicitConversion(Expr *From, QualType ToType, const ImplicitConversionSequence& ICS, AssignmentAction Action, diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 21a9ad04d5008..71341e5688fe0 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1494,17 +1494,9 @@ Sema::TryImplicitConversion(Expr *From, QualType ToType, /// converted expression. Flavor is the kind of conversion we're /// performing, used in the error message. If @p AllowExplicit, /// explicit user-defined conversions are permitted. -ExprResult -Sema::PerformImplicitConversion(Expr *From, QualType ToType, - AssignmentAction Action, bool AllowExplicit) { - ImplicitConversionSequence ICS; - return PerformImplicitConversion(From, ToType, Action, AllowExplicit, ICS); -} - -ExprResult -Sema::PerformImplicitConversion(Expr *From, QualType ToType, - AssignmentAction Action, bool AllowExplicit, - ImplicitConversionSequence& ICS) { +ExprResult Sema::PerformImplicitConversion(Expr *From, QualType ToType, + AssignmentAction Action, + bool AllowExplicit) { if (checkPlaceholderForOverload(*this, From)) return ExprError(); @@ -1515,13 +1507,13 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, if (getLangOpts().ObjC) CheckObjCBridgeRelatedConversions(From->getBeginLoc(), ToType, From->getType(), From); - ICS = ::TryImplicitConversion(*this, From, ToType, - /*SuppressUserConversions=*/false, - AllowExplicit ? AllowedExplicit::All - : AllowedExplicit::None, - /*InOverloadResolution=*/false, - /*CStyle=*/false, AllowObjCWritebackConversion, - /*AllowObjCConversionOnExplicit=*/false); + ImplicitConversionSequence ICS = ::TryImplicitConversion( + *this, From, ToType, + /*SuppressUserConversions=*/false, + AllowExplicit ? AllowedExplicit::All : AllowedExplicit::None, + /*InOverloadResolution=*/false, + /*CStyle=*/false, AllowObjCWritebackConversion, + /*AllowObjCConversionOnExplicit=*/false); return PerformImplicitConversion(From, ToType, ICS, Action); } From bf41c4d29e44bfe3ae96c968e2e44761d5acb3ed Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 23 Oct 2019 12:46:37 -0400 Subject: [PATCH 156/465] [codegen] Ensure target flags are cleared/set properly. NFC. - When an operand is changed into an immediate value or like, ensure their target flags being cleared or set properly. Differential Revision: https://reviews.llvm.org/D87109 --- llvm/include/llvm/CodeGen/MachineOperand.h | 8 ++++---- llvm/lib/CodeGen/MachineOperand.cpp | 13 +++++++++---- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 5 ----- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 1 - llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 4 ---- 5 files changed, 13 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h index c4fe67c419cd6..b7e89cf4b133f 100644 --- a/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/llvm/include/llvm/CodeGen/MachineOperand.h @@ -728,12 +728,12 @@ class MachineOperand { /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. - void ChangeToImmediate(int64_t ImmVal); + void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags = 0); /// ChangeToFPImmediate - Replace this operand with a new FP immediate operand /// of the specified value. If an operand is known to be an FP immediate /// already, the setFPImm method should be used. - void ChangeToFPImmediate(const ConstantFP *FPImm); + void ChangeToFPImmediate(const ConstantFP *FPImm, unsigned TargetFlags = 0); /// ChangeToES - Replace this operand with a new external symbol operand. void ChangeToES(const char *SymName, unsigned TargetFlags = 0); @@ -743,10 +743,10 @@ class MachineOperand { unsigned TargetFlags = 0); /// ChangeToMCSymbol - Replace this operand with a new MC symbol operand. - void ChangeToMCSymbol(MCSymbol *Sym); + void ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags = 0); /// Replace this operand with a frame index. - void ChangeToFrameIndex(int Idx); + void ChangeToFrameIndex(int Idx, unsigned TargetFlags = 0); /// Replace this operand with a target index. void ChangeToTargetIndex(unsigned Idx, int64_t Offset, diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index ce33cdb28b1e4..76b69dfdcf718 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -153,22 +153,25 @@ void MachineOperand::removeRegFromUses() { /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. -void MachineOperand::ChangeToImmediate(int64_t ImmVal) { +void MachineOperand::ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); removeRegFromUses(); OpKind = MO_Immediate; Contents.ImmVal = ImmVal; + setTargetFlags(TargetFlags); } -void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { +void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm, + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); removeRegFromUses(); OpKind = MO_FPImmediate; Contents.CFP = FPImm; + setTargetFlags(TargetFlags); } void MachineOperand::ChangeToES(const char *SymName, @@ -197,7 +200,7 @@ void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset, setTargetFlags(TargetFlags); } -void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { +void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an MCSymbol"); @@ -205,9 +208,10 @@ void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { OpKind = MO_MCSymbol; Contents.Sym = Sym; + setTargetFlags(TargetFlags); } -void MachineOperand::ChangeToFrameIndex(int Idx) { +void MachineOperand::ChangeToFrameIndex(int Idx, unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into a FrameIndex"); @@ -215,6 +219,7 @@ void MachineOperand::ChangeToFrameIndex(int Idx) { OpKind = MO_FrameIndex; setIndex(Idx); + setTargetFlags(TargetFlags); } void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index ab89257a57168..9a30d4fd6bd4a 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -282,9 +282,6 @@ static bool updateOperand(FoldCandidate &Fold, assert(!Fold.needsShrink() && "not handled"); if (Fold.isImm()) { - // FIXME: ChangeToImmediate should probably clear the subreg flags. It's - // reinterpreted as TargetFlags. - Old.setSubReg(0); Old.ChangeToImmediate(Fold.ImmToFold); return true; } @@ -834,8 +831,6 @@ void SIFoldOperands::foldOperand( UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32)); - // FIXME: ChangeToImmediate should clear subreg - UseMI->getOperand(1).setSubReg(0); if (OpToFold.isImm()) UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm()); else diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 74f8864640691..9aa28cff10868 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2656,7 +2656,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.setDesc(get(NewOpc)); UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue()); - UseMI.getOperand(1).setTargetFlags(0); UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 9548c0f3d9c4a..8f718ce6cb466 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -86,13 +86,9 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) { - // It's possible to have only one component of a super-reg defined by - // a single mov, so we need to clear any subregister flag. - Src0.setSubReg(0); Src0.ChangeToImmediate(MovSrc.getImm()); ConstantFolded = true; } else if (MovSrc.isFI()) { - Src0.setSubReg(0); Src0.ChangeToFrameIndex(MovSrc.getIndex()); ConstantFolded = true; } else if (MovSrc.isGlobal()) { From aaf1a96408b1587b5fb80a3a7c424348cb09e577 Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Thu, 3 Sep 2020 15:42:27 -0700 Subject: [PATCH 157/465] [DebugInfo] Add size to class declarations in debug info. This adds the size to forward declared class DITypes, if the size is known. Fixes an issue where we determine whether to emit fragments based on the type size, so fragments would sometimes be incorrectly emitted if there was no size. Bug: https://bugs.llvm.org/show_bug.cgi?id=47338 Differential Revision: https://reviews.llvm.org/D87062 --- clang/lib/CodeGen/CGDebugInfo.cpp | 4 ++++ clang/test/CodeGenCXX/debug-info-class.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 8a85a24910e4e..1fdb6814c7bd8 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1031,6 +1031,10 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty, uint64_t Size = 0; uint32_t Align = 0; + const RecordDecl *D = RD->getDefinition(); + if (D && D->isCompleteDefinition()) + Size = CGM.getContext().getTypeSize(Ty); + llvm::DINode::DIFlags Flags = llvm::DINode::FlagFwdDecl; // Add flag to nontrivial forward declarations. To be consistent with MSVC, diff --git a/clang/test/CodeGenCXX/debug-info-class.cpp b/clang/test/CodeGenCXX/debug-info-class.cpp index 94d5a0f1f0820..e000532b8c3b1 100644 --- a/clang/test/CodeGenCXX/debug-info-class.cpp +++ b/clang/test/CodeGenCXX/debug-info-class.cpp @@ -136,7 +136,7 @@ int main(int argc, char **argv) { // CHECK: [[C_DTOR]] = !DISubprogram(name: "~C" // CHECK: [[D:![0-9]+]] = !DICompositeType(tag: DW_TAG_structure_type, name: "D" -// CHECK-NOT: size: +// CHECK-SAME: size: // CHECK-SAME: DIFlagFwdDecl // CHECK-NOT: identifier: // CHECK-SAME: ){{$}} From 673484b34189b1bccf73a2ec96968092bc8a26a7 Mon Sep 17 00:00:00 2001 From: Ryan Prichard Date: Thu, 3 Sep 2020 15:59:45 -0700 Subject: [PATCH 158/465] [libunwind] Minor SJLJ config cleanup. NFCI. Simplify: defined(__ARM_DWARF_EH__) || !defined(__arm__) to: !defined(_LIBUNWIND_ARM_EHABI) A later patch benefits from the simplicity. This change will result in the two DWARF macros being defined when __USING_SJLJ_EXCEPTIONS__ is defined, but: * That's already the case with the __APPLE__ and _WIN32 clauses. * That's also already the case with other architectures. * With __USING_SJLJ_EXCEPTIONS__, most of the unwinder is #ifdef'ed away. Generally, when __USING_SJLJ_EXCEPTIONS__ is defined, most of the libunwind code is removed by the preprocessor. e.g. None of the hpp files are included, and almost all of the .c and .cpp files are defined away, except in Unwind-sjlj.c. Unwind_AppleExtras.cpp is an exception because it includes two hpp files, which it doesn't use. Remove the unneeded includes for consistency with the general rule. Reviewed By: steven_wu Differential Revision: https://reviews.llvm.org/D86767 --- libunwind/src/Unwind_AppleExtras.cpp | 2 -- libunwind/src/config.h | 6 +++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/libunwind/src/Unwind_AppleExtras.cpp b/libunwind/src/Unwind_AppleExtras.cpp index 1d9948aced355..e3d41ca2b4e92 100644 --- a/libunwind/src/Unwind_AppleExtras.cpp +++ b/libunwind/src/Unwind_AppleExtras.cpp @@ -8,8 +8,6 @@ //===----------------------------------------------------------------------===// #include "config.h" -#include "AddressSpace.hpp" -#include "DwarfParser.hpp" // static linker symbols to prevent wrong two level namespace for _Unwind symbols diff --git a/libunwind/src/config.h b/libunwind/src/config.h index 2014b8cb77abd..fd177dd7338c1 100644 --- a/libunwind/src/config.h +++ b/libunwind/src/config.h @@ -18,6 +18,8 @@ #include #include +#include <__libunwind_config.h> + // Platform specific configuration defines. #ifdef __APPLE__ #if defined(FOR_DYLD) @@ -33,7 +35,7 @@ #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #endif #else - #if defined(__ARM_DWARF_EH__) || !defined(__arm__) + #if !defined(_LIBUNWIND_ARM_EHABI) #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1 #endif @@ -81,6 +83,8 @@ #error Unsupported target #endif +// Apple/armv7k defaults to DWARF/Compact unwinding, but its libunwind also +// needs to include the SJLJ APIs. #if (defined(__APPLE__) && defined(__arm__)) || defined(__USING_SJLJ_EXCEPTIONS__) #define _LIBUNWIND_BUILD_SJLJ_APIS #endif From 08513505571b0167f387c4e957ddcc138f77a9ca Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 3 Sep 2020 16:19:10 -0700 Subject: [PATCH 159/465] [X86] Update stale comment. NFC The optimization in ExpandIntOp_UINT_TO_FP was removed in D72728 in January 2020. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6560ab2bcccdd..7a97527ecdf25 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20149,10 +20149,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue Store = DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Align(8)); // For i64 source, we need to add the appropriate power of 2 if the input - // was negative. This is the same as the optimization in - // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here, - // we must be careful to do the computation in x87 extended precision, not - // in SSE. (The generic code can't know it's OK to do this, or how to.) + // was negative. We must be careful to do the computation in x87 extended + // precision, not in SSE. SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot }; SDValue Fild = From 3c2a7bd2867eda29da8ddf5f2db2e006d504e611 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Sep 2020 11:36:22 -0400 Subject: [PATCH 160/465] AMDGPU: Remove code to handle tied si_else operands This has not used tied operands for a long time. --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 13 +------------ llvm/test/CodeGen/AMDGPU/collapse-endcf.mir | 9 ++++----- .../CodeGen/AMDGPU/control-flow-fastregalloc.ll | 12 ++++++------ .../AMDGPU/lower-control-flow-other-terminators.mir | 11 +++++------ 4 files changed, 16 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 8488e86fbc297..0246c6508e9f2 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -335,21 +335,13 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { bool ExecModified = MI.getOperand(3).getImm() != 0; MachineBasicBlock::iterator Start = MBB.begin(); - // We are running before TwoAddressInstructions, and si_else's operands are - // tied. In order to correctly tie the registers, split this into a copy of - // the src like it does. - Register CopyReg = MRI->createVirtualRegister(BoolRC); - MachineInstr *CopyExec = - BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg) - .add(MI.getOperand(1)); // Saved EXEC - // This must be inserted before phis and any spill code inserted before the // else. Register SaveReg = ExecModified ? MRI->createVirtualRegister(BoolRC) : DstReg; MachineInstr *OrSaveExec = BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg) - .addReg(CopyReg); + .add(MI.getOperand(1)); // Saved EXEC MachineBasicBlock *DestBB = MI.getOperand(2).getMBB(); @@ -386,16 +378,13 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { LIS->RemoveMachineInstrFromMaps(MI); MI.eraseFromParent(); - LIS->InsertMachineInstrInMaps(*CopyExec); LIS->InsertMachineInstrInMaps(*OrSaveExec); LIS->InsertMachineInstrInMaps(*Xor); LIS->InsertMachineInstrInMaps(*Branch); - // src reg is tied to dst reg. LIS->removeInterval(DstReg); LIS->createAndComputeVirtRegInterval(DstReg); - LIS->createAndComputeVirtRegInterval(CopyReg); if (ExecModified) LIS->createAndComputeVirtRegInterval(SaveReg); diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir index 815251e3560ce..d50973c9abf99 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -484,21 +484,20 @@ body: | ; GCN: S_BRANCH %bb.2 ; GCN: bb.2: ; GCN: successors: %bb.3(0x40000000), %bb.6(0x40000000) - ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_XOR_B64_]] - ; GCN: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 [[COPY1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 [[S_XOR_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GCN: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc ; GCN: S_CBRANCH_EXECZ %bb.6, implicit $exec ; GCN: bb.3: ; GCN: successors: %bb.3(0x40000000), %bb.4(0x40000000) - ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec - ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], undef %4:sreg_64, implicit-def dead $scc + ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %4:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN: bb.4: ; GCN: successors: %bb.5(0x80000000) ; GCN: bb.5: ; GCN: successors: %bb.6(0x80000000) - ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc + ; GCN: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc ; GCN: bb.6: ; GCN: $exec = S_OR_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc ; GCN: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index 9d66f849391d1..6da332a596fb0 100644 --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -198,23 +198,23 @@ end: ; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 0 ; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 1 -; GCN: s_or_saveexec_b64 s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} +; GCN: s_or_saveexec_b64 s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC:[0-9]+]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC:[0-9]+]]{{\]}}, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} ; Regular spill value restored after exec modification ; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload ; Spill saved exec -; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]], [[FLOW_SAVEEXEC_LO_LANE:[0-9]+]] -; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]], [[FLOW_SAVEEXEC_HI_LANE:[0-9]+]] +; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC]], [[FLOW_SAVEEXEC_LO_LANE:[0-9]+]] +; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC]], [[FLOW_SAVEEXEC_HI_LANE:[0-9]+]] -; VMEM: v_writelane_b32 v[[FLOW_V_SAVEEXEC:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]], 0 -; VMEM: v_writelane_b32 v[[FLOW_V_SAVEEXEC]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]], 1 +; VMEM: v_writelane_b32 v[[FLOW_V_SAVEEXEC:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC]], 0 +; VMEM: v_writelane_b32 v[[FLOW_V_SAVEEXEC]], s[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC]], 1 ; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC]], off, s[0:3], 0 offset:[[FLOW_SAVEEXEC_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], 0 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} +; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC]]{{\]}} ; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir index 08e6f1a067ac5..faea2df6b517b 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir @@ -100,13 +100,12 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %2 - ; CHECK: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_SAVEEXEC_B64 [[COPY]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 - ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY1]], implicit $exec + ; CHECK: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_SAVEEXEC_B64 %2, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY2]], implicit $exec + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec ; CHECK: S_BRANCH %bb.2 ; CHECK: bb.1: From 2dd9a4d855f6a6d3e84dd4d1e489e6265d206732 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 3 Sep 2020 18:06:08 -0700 Subject: [PATCH 161/465] [SmallVector] Include stdexcept if LLVM_ENABLE_EXCEPTIONS std::length_error needs stdexcept. --- llvm/include/llvm/ADT/SmallVector.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 5d8658f612718..8197b8688a529 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -32,6 +32,9 @@ #include #include #include +#ifdef LLVM_ENABLE_EXCEPTIONS +#include +#endif namespace llvm { From a803ddc522ed69d103d6b6feef0318a2d16d53a7 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 3 Sep 2020 18:48:03 -0700 Subject: [PATCH 162/465] [NFC][Asan] Add FIXME into GetAsanChunk --- compiler-rt/lib/asan/asan_allocator.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 448dece0a1703..16b264080b5ac 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -744,6 +744,9 @@ struct Allocator { uptr *alloc_magic = reinterpret_cast(alloc_beg); if (alloc_magic[0] == kAllocBegMagic) return reinterpret_cast(alloc_magic[1]); + // FIXME: This is either valid small chunk with tiny redzine or invalid + // chunk which is beeing allocated/deallocated. The latter case should + // return nullptr like secondary allocator does. return reinterpret_cast(alloc_beg); } From 99a93c3a223e3bfc9a9781bfbf98d2fd4551f923 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 3 Sep 2020 18:54:52 -0700 Subject: [PATCH 163/465] [NFC][Asan] Rename internal enum value. New name better represents the state of chunk. --- compiler-rt/lib/asan/asan_allocator.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 16b264080b5ac..c7c9d7a7b3ce4 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -106,8 +106,11 @@ COMPILER_CHECK(kChunkHeader2Size <= 16); // CHUNK_ALLOCATED: the chunk is allocated and not yet freed. // CHUNK_QUARANTINE: the chunk was freed and put into quarantine zone. enum { - CHUNK_AVAILABLE = 0, // 0 is the default value even if we didn't set it. - CHUNK_ALLOCATED = 2, + // Either just allocated by underlying allocator, but AsanChunk is not yet + // ready, or almost returned to undelying allocator and AsanChunk is already + // meaningless. + CHUNK_INVALID = 0, + CHUNK_ALLOCATED = 2, CHUNK_QUARANTINE = 3 }; @@ -142,8 +145,7 @@ struct QuarantineCallback { void Recycle(AsanChunk *m) { u8 old_chunk_state = CHUNK_QUARANTINE; if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, - CHUNK_AVAILABLE, - memory_order_acquire)) { + CHUNK_INVALID, memory_order_acquire)) { CHECK_EQ(old_chunk_state, CHUNK_QUARANTINE); } @@ -859,7 +861,7 @@ static AsanAllocator &get_allocator() { bool AsanChunkView::IsValid() const { return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) != - CHUNK_AVAILABLE; + CHUNK_INVALID; } bool AsanChunkView::IsAllocated() const { return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == From 0ac81333ebc3d75f3c8dfe0ecc5a506c8ef480d7 Mon Sep 17 00:00:00 2001 From: Puyan Lotfi Date: Thu, 3 Sep 2020 22:42:27 -0400 Subject: [PATCH 164/465] [NFC] Adding pythonenv* to .gitignore The new feature in GitHub called 'GitHub Codespaces' generates a pythonenv3.8 directory in the root level of the llvm-project git checkout. So I am adding that directory to the .gitignore. See the following for more info: https://github.com/features/codespaces Differential Revision: https://reviews.llvm.org/D86846 --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 1c8d4b15925de..5e937552c5f85 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,8 @@ autoconf/autom4te.cache # VS2017 and VSCode config files. .vscode .vs +# pythonenv for github Codespaces +pythonenv* # clangd index. (".clangd" is a config file now, thus trailing slash) .clangd/ .cache From 060c9dd1cc467cbeb6cf1c29dd44d07f562606b4 Mon Sep 17 00:00:00 2001 From: aartbik Date: Thu, 3 Sep 2020 15:57:25 -0700 Subject: [PATCH 165/465] [mlir] [VectorOps] Improve SIMD compares with narrower indices When allowed, use 32-bit indices rather than 64-bit indices in the SIMD computation of masks. This runs up to 2x and 4x faster on a number of AVX2 and AVX512 microbenchmarks. Reviewed By: bkramer Differential Revision: https://reviews.llvm.org/D87116 --- mlir/include/mlir/Conversion/Passes.td | 5 +- .../VectorToLLVM/ConvertVectorToLLVM.h | 12 +- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 145 ++++++++++++------ mlir/lib/Dialect/Vector/VectorTransforms.cpp | 20 +-- .../VectorToLLVM/vector-mask-to-llvm.mlir | 48 ++++++ .../VectorToLLVM/vector-to-llvm.mlir | 18 +-- .../Vector/vector-contract-transforms.mlir | 64 +++++--- 7 files changed, 218 insertions(+), 94 deletions(-) create mode 100644 mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 6686e28658138..1b27a7308c7a0 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -358,7 +358,10 @@ def ConvertVectorToLLVM : Pass<"convert-vector-to-llvm", "ModuleOp"> { let options = [ Option<"reassociateFPReductions", "reassociate-fp-reductions", "bool", /*default=*/"false", - "Allows llvm to reassociate floating-point reductions for speed"> + "Allows llvm to reassociate floating-point reductions for speed">, + Option<"enableIndexOptimizations", "enable-index-optimizations", + "bool", /*default=*/"false", + "Allows compiler to assume indices fit in 32-bit if that yields faster code"> ]; } diff --git a/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h b/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h index 82aa8287d90f3..81ffa63281357 100644 --- a/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h +++ b/mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h @@ -22,8 +22,13 @@ class OperationPass; /// ConvertVectorToLLVM pass in include/mlir/Conversion/Passes.td struct LowerVectorToLLVMOptions { bool reassociateFPReductions = false; - LowerVectorToLLVMOptions &setReassociateFPReductions(bool r) { - reassociateFPReductions = r; + bool enableIndexOptimizations = false; + LowerVectorToLLVMOptions &setReassociateFPReductions(bool b) { + reassociateFPReductions = b; + return *this; + } + LowerVectorToLLVMOptions &setEnableIndexOptimizations(bool b) { + enableIndexOptimizations = b; return *this; } }; @@ -37,7 +42,8 @@ void populateVectorToLLVMMatrixConversionPatterns( /// Collect a set of patterns to convert from the Vector dialect to LLVM. void populateVectorToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, - bool reassociateFPReductions = false); + bool reassociateFPReductions = false, + bool enableIndexOptimizations = false); /// Create a pass to convert vector operations to the LLVMIR dialect. std::unique_ptr> createConvertVectorToLLVMPass( diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index ecb047a1ad143..dfa204d17389a 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -117,6 +117,49 @@ static SmallVector getI64SubArray(ArrayAttr arrayAttr, return res; } +// Helper that returns a vector comparison that constructs a mask: +// mask = [0,1,..,n-1] + [o,o,..,o] < [b,b,..,b] +// +// NOTE: The LLVM::GetActiveLaneMaskOp intrinsic would provide an alternative, +// much more compact, IR for this operation, but LLVM eventually +// generates more elaborate instructions for this intrinsic since it +// is very conservative on the boundary conditions. +static Value buildVectorComparison(ConversionPatternRewriter &rewriter, + Operation *op, bool enableIndexOptimizations, + int64_t dim, Value b, Value *off = nullptr) { + auto loc = op->getLoc(); + // If we can assume all indices fit in 32-bit, we perform the vector + // comparison in 32-bit to get a higher degree of SIMD parallelism. + // Otherwise we perform the vector comparison using 64-bit indices. + Value indices; + Type idxType; + if (enableIndexOptimizations) { + SmallVector values(dim); + for (int64_t d = 0; d < dim; d++) + values[d] = d; + indices = + rewriter.create(loc, rewriter.getI32VectorAttr(values)); + idxType = rewriter.getI32Type(); + } else { + SmallVector values(dim); + for (int64_t d = 0; d < dim; d++) + values[d] = d; + indices = + rewriter.create(loc, rewriter.getI64VectorAttr(values)); + idxType = rewriter.getI64Type(); + } + // Add in an offset if requested. + if (off) { + Value o = rewriter.create(loc, idxType, *off); + Value ov = rewriter.create(loc, indices.getType(), o); + indices = rewriter.create(loc, ov, indices); + } + // Construct the vector comparison. + Value bound = rewriter.create(loc, idxType, b); + Value bounds = rewriter.create(loc, indices.getType(), bound); + return rewriter.create(loc, CmpIPredicate::slt, indices, bounds); +} + // Helper that returns data layout alignment of an operation with memref. template LogicalResult getMemRefAlignment(LLVMTypeConverter &typeConverter, T op, @@ -512,10 +555,10 @@ class VectorReductionOpConversion : public ConvertToLLVMPattern { public: explicit VectorReductionOpConversion(MLIRContext *context, LLVMTypeConverter &typeConverter, - bool reassociateFP) + bool reassociateFPRed) : ConvertToLLVMPattern(vector::ReductionOp::getOperationName(), context, typeConverter), - reassociateFPReductions(reassociateFP) {} + reassociateFPReductions(reassociateFPRed) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, @@ -589,6 +632,34 @@ class VectorReductionOpConversion : public ConvertToLLVMPattern { const bool reassociateFPReductions; }; +/// Conversion pattern for a vector.create_mask (1-D only). +class VectorCreateMaskOpConversion : public ConvertToLLVMPattern { +public: + explicit VectorCreateMaskOpConversion(MLIRContext *context, + LLVMTypeConverter &typeConverter, + bool enableIndexOpt) + : ConvertToLLVMPattern(vector::CreateMaskOp::getOperationName(), context, + typeConverter), + enableIndexOptimizations(enableIndexOpt) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto dstType = op->getResult(0).getType().cast(); + int64_t rank = dstType.getRank(); + if (rank == 1) { + rewriter.replaceOp( + op, buildVectorComparison(rewriter, op, enableIndexOptimizations, + dstType.getDimSize(0), operands[0])); + return success(); + } + return failure(); + } + +private: + const bool enableIndexOptimizations; +}; + class VectorShuffleOpConversion : public ConvertToLLVMPattern { public: explicit VectorShuffleOpConversion(MLIRContext *context, @@ -1121,17 +1192,19 @@ class VectorTypeCastOpConversion : public ConvertToLLVMPattern { /// Conversion pattern that converts a 1-D vector transfer read/write op in a /// sequence of: -/// 1. Bitcast or addrspacecast to vector form. -/// 2. Create an offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. -/// 3. Create a mask where offsetVector is compared against memref upper bound. -/// 4. Rewrite op as a masked read or write. +/// 1. Get the source/dst address as an LLVM vector pointer. +/// 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. +/// 3. Create an offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. +/// 4. Create a mask where offsetVector is compared against memref upper bound. +/// 5. Rewrite op as a masked read or write. template class VectorTransferConversion : public ConvertToLLVMPattern { public: explicit VectorTransferConversion(MLIRContext *context, - LLVMTypeConverter &typeConv) - : ConvertToLLVMPattern(ConcreteOp::getOperationName(), context, - typeConv) {} + LLVMTypeConverter &typeConv, + bool enableIndexOpt) + : ConvertToLLVMPattern(ConcreteOp::getOperationName(), context, typeConv), + enableIndexOptimizations(enableIndexOpt) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, @@ -1155,7 +1228,6 @@ class VectorTransferConversion : public ConvertToLLVMPattern { auto toLLVMTy = [&](Type t) { return typeConverter.convertType(t); }; Location loc = op->getLoc(); - Type i64Type = rewriter.getIntegerType(64); MemRefType memRefType = xferOp.getMemRefType(); if (auto memrefVectorElementType = @@ -1202,41 +1274,26 @@ class VectorTransferConversion : public ConvertToLLVMPattern { xferOp, operands, vectorDataPtr); // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. - unsigned vecWidth = vecTy.getVectorNumElements(); - VectorType vectorCmpType = VectorType::get(vecWidth, i64Type); - SmallVector indices; - indices.reserve(vecWidth); - for (unsigned i = 0; i < vecWidth; ++i) - indices.push_back(i); - Value linearIndices = rewriter.create( - loc, vectorCmpType, - DenseElementsAttr::get(vectorCmpType, ArrayRef(indices))); - linearIndices = rewriter.create( - loc, toLLVMTy(vectorCmpType), linearIndices); - // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. - // TODO: when the leaf transfer rank is k > 1 we need the last - // `k` dimensions here. - unsigned lastIndex = llvm::size(xferOp.indices()) - 1; - Value offsetIndex = *(xferOp.indices().begin() + lastIndex); - offsetIndex = rewriter.create(loc, i64Type, offsetIndex); - Value base = rewriter.create(loc, vectorCmpType, offsetIndex); - Value offsetVector = rewriter.create(loc, base, linearIndices); - // 4. Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] + // + // TODO: when the leaf transfer rank is k > 1, we need the last `k` + // dimensions here. + unsigned vecWidth = vecTy.getVectorNumElements(); + unsigned lastIndex = llvm::size(xferOp.indices()) - 1; + Value off = *(xferOp.indices().begin() + lastIndex); Value dim = rewriter.create(loc, xferOp.memref(), lastIndex); - dim = rewriter.create(loc, i64Type, dim); - dim = rewriter.create(loc, vectorCmpType, dim); - Value mask = - rewriter.create(loc, CmpIPredicate::slt, offsetVector, dim); - mask = rewriter.create(loc, toLLVMTy(mask.getType()), - mask); + Value mask = buildVectorComparison(rewriter, op, enableIndexOptimizations, + vecWidth, dim, &off); // 5. Rewrite as a masked read / write. return replaceTransferOpWithMasked(rewriter, typeConverter, loc, xferOp, operands, vectorDataPtr, mask); } + +private: + const bool enableIndexOptimizations; }; class VectorPrintOpConversion : public ConvertToLLVMPattern { @@ -1444,7 +1501,7 @@ class VectorExtractStridedSliceOpConversion /// Populate the given list with patterns that convert from Vector to LLVM. void mlir::populateVectorToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, - bool reassociateFPReductions) { + bool reassociateFPReductions, bool enableIndexOptimizations) { MLIRContext *ctx = converter.getDialect()->getContext(); // clang-format off patterns.insert(ctx); patterns.insert( ctx, converter, reassociateFPReductions); + patterns.insert, + VectorTransferConversion>( + ctx, converter, enableIndexOptimizations); patterns .insert, - VectorTransferConversion, VectorTypeCastOpConversion, VectorMaskedLoadOpConversion, VectorMaskedStoreOpConversion, @@ -1485,6 +1544,7 @@ struct LowerVectorToLLVMPass : public ConvertVectorToLLVMBase { LowerVectorToLLVMPass(const LowerVectorToLLVMOptions &options) { this->reassociateFPReductions = options.reassociateFPReductions; + this->enableIndexOptimizations = options.enableIndexOptimizations; } void runOnOperation() override; }; @@ -1505,15 +1565,14 @@ void LowerVectorToLLVMPass::runOnOperation() { LLVMTypeConverter converter(&getContext()); OwningRewritePatternList patterns; populateVectorToLLVMMatrixConversionPatterns(converter, patterns); - populateVectorToLLVMConversionPatterns(converter, patterns, - reassociateFPReductions); + populateVectorToLLVMConversionPatterns( + converter, patterns, reassociateFPReductions, enableIndexOptimizations); populateVectorToLLVMMatrixConversionPatterns(converter, patterns); populateStdToLLVMConversionPatterns(converter, patterns); LLVMConversionTarget target(getContext()); - if (failed(applyPartialConversion(getOperation(), target, patterns))) { + if (failed(applyPartialConversion(getOperation(), target, patterns))) signalPassFailure(); - } } std::unique_ptr> diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 16d10e558b5eb..332bfbe2f4577 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -1347,7 +1347,8 @@ class ConstantMaskOpLowering : public OpRewritePattern { auto eltType = dstType.getElementType(); auto dimSizes = op.mask_dim_sizes(); int64_t rank = dimSizes.size(); - int64_t trueDim = dimSizes[0].cast().getInt(); + int64_t trueDim = std::min(dstType.getDimSize(0), + dimSizes[0].cast().getInt()); if (rank == 1) { // Express constant 1-D case in explicit vector form: @@ -1402,21 +1403,8 @@ class CreateMaskOpLowering : public OpRewritePattern { int64_t rank = dstType.getRank(); Value idx = op.getOperand(0); - if (rank == 1) { - // Express dynamic 1-D case in explicit vector form: - // mask = [0,1,..,n-1] < [a,a,..,a] - SmallVector values(dim); - for (int64_t d = 0; d < dim; d++) - values[d] = d; - Value indices = - rewriter.create(loc, rewriter.getI64VectorAttr(values)); - Value bound = - rewriter.create(loc, rewriter.getI64Type(), idx); - Value bounds = rewriter.create(loc, indices.getType(), bound); - rewriter.replaceOpWithNewOp(op, CmpIPredicate::slt, indices, - bounds); - return success(); - } + if (rank == 1) + return failure(); // leave for lowering VectorType lowType = VectorType::get(dstType.getShape().drop_front(), eltType); diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir new file mode 100644 index 0000000000000..ec05e349897a7 --- /dev/null +++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir @@ -0,0 +1,48 @@ +// RUN: mlir-opt %s --convert-vector-to-llvm='enable-index-optimizations=1' | FileCheck %s --check-prefix=CMP32 +// RUN: mlir-opt %s --convert-vector-to-llvm='enable-index-optimizations=0' | FileCheck %s --check-prefix=CMP64 + +// CMP32-LABEL: llvm.func @genbool_var_1d( +// CMP32-SAME: %[[A:.*]]: !llvm.i64) +// CMP32: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>) : !llvm.vec<11 x i32> +// CMP32: %[[T1:.*]] = llvm.trunc %[[A]] : !llvm.i64 to !llvm.i32 +// CMP32: %[[T2:.*]] = llvm.mlir.undef : !llvm.vec<11 x i32> +// CMP32: %[[T3:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 +// CMP32: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%[[T3]] : !llvm.i32] : !llvm.vec<11 x i32> +// CMP32: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<11 x i32>, !llvm.vec<11 x i32> +// CMP32: %[[T6:.*]] = llvm.icmp "slt" %[[T0]], %[[T5]] : !llvm.vec<11 x i32> +// CMP32: llvm.return %[[T6]] : !llvm.vec<11 x i1> + +// CMP64-LABEL: llvm.func @genbool_var_1d( +// CMP64-SAME: %[[A:.*]]: !llvm.i64) +// CMP64: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64>) : !llvm.vec<11 x i64> +// CMP64: %[[T1:.*]] = llvm.mlir.undef : !llvm.vec<11 x i64> +// CMP64: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 +// CMP64: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : !llvm.i32] : !llvm.vec<11 x i64> +// CMP64: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T1]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<11 x i64>, !llvm.vec<11 x i64> +// CMP64: %[[T5:.*]] = llvm.icmp "slt" %[[T0]], %[[T4]] : !llvm.vec<11 x i64> +// CMP64: llvm.return %[[T5]] : !llvm.vec<11 x i1> + +func @genbool_var_1d(%arg0: index) -> vector<11xi1> { + %0 = vector.create_mask %arg0 : vector<11xi1> + return %0 : vector<11xi1> +} + +// CMP32-LABEL: llvm.func @transfer_read_1d +// CMP32: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>) : !llvm.vec<16 x i32> +// CMP32: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : !llvm.vec<16 x i32> +// CMP32: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : !llvm.vec<16 x i32> +// CMP32: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}} +// CMP32: llvm.return %[[L]] : !llvm.vec<16 x float> + +// CMP64-LABEL: llvm.func @transfer_read_1d +// CMP64: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64>) : !llvm.vec<16 x i64> +// CMP64: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : !llvm.vec<16 x i64> +// CMP64: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : !llvm.vec<16 x i64> +// CMP64: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}} +// CMP64: llvm.return %[[L]] : !llvm.vec<16 x float> + +func @transfer_read_1d(%A : memref, %i: index) -> vector<16xf32> { + %d = constant -1.0: f32 + %f = vector.transfer_read %A[%i], %d {permutation_map = affine_map<(d0) -> (d0)>} : memref, vector<16xf32> + return %f : vector<16xf32> +} diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index d35c7fa645b7f..e0800c2fd2272 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -749,10 +749,12 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // CHECK-SAME: (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] : // CHECK-SAME: !llvm.ptr to !llvm.ptr> +// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] : +// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. -// CHECK: %[[linearIndex:.*]] = llvm.mlir.constant( -// CHECK-SAME: dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : +// CHECK: %[[linearIndex:.*]] = llvm.mlir.constant(dense +// CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : // CHECK-SAME: vector<17xi64>) : !llvm.vec<17 x i64> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. @@ -770,8 +772,6 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // // 4. Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] -// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] : -// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> // CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[DIM]], %[[dimVec]][%[[c01]] : @@ -799,9 +799,9 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. -// CHECK: %[[linearIndex_b:.*]] = llvm.mlir.constant( -// CHECK-SAME: dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : -// CHECK-SAME: vector<17xi64>) : !llvm.vec<17 x i64> +// CHECK: %[[linearIndex_b:.*]] = llvm.mlir.constant(dense +// CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : +// CHECK-SAME: vector<17xi64>) : !llvm.vec<17 x i64> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. // CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, @@ -832,6 +832,8 @@ func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) } // CHECK-LABEL: func @transfer_read_2d_to_1d // CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: !llvm.i64, %[[BASE_1:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm.vec<17 x float> +// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 1] : +// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // // Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. // CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> @@ -847,8 +849,6 @@ func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) // Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] // Here we check we properly use %DIM[1] -// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 1] : -// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> // CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[DIM]], %[[dimVec]][%[[c01]] : diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir index e34e3428c185e..aaaa7adf6472c 100644 --- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir @@ -785,43 +785,63 @@ func @genbool_3d() -> vector<2x3x4xi1> { return %v: vector<2x3x4xi1> } -// CHECK-LABEL: func @genbool_var_1d -// CHECK-SAME: %[[A:.*]]: index -// CHECK: %[[C1:.*]] = constant dense<[0, 1, 2]> : vector<3xi64> -// CHECK: %[[T0:.*]] = index_cast %[[A]] : index to i64 -// CHECK: %[[T1:.*]] = splat %[[T0]] : vector<3xi64> -// CHECK: %[[T2:.*]] = cmpi "slt", %[[C1]], %[[T1]] : vector<3xi64> -// CHECK: return %[[T2]] : vector<3xi1> +// CHECK-LABEL: func @genbool_var_1d( +// CHECK-SAME: %[[A:.*]]: index) +// CHECK: %[[T0:.*]] = vector.create_mask %[[A]] : vector<3xi1> +// CHECK: return %[[T0]] : vector<3xi1> func @genbool_var_1d(%arg0: index) -> vector<3xi1> { %0 = vector.create_mask %arg0 : vector<3xi1> return %0 : vector<3xi1> } -// CHECK-LABEL: func @genbool_var_2d -// CHECK-SAME: %[[A:.*0]]: index -// CHECK-SAME: %[[B:.*1]]: index -// CHECK: %[[CI:.*]] = constant dense<[0, 1, 2]> : vector<3xi64> -// CHECK: %[[CF:.*]] = constant dense : vector<3xi1> +// CHECK-LABEL: func @genbool_var_2d( +// CHECK-SAME: %[[A:.*0]]: index, +// CHECK-SAME: %[[B:.*1]]: index) +// CHECK: %[[C1:.*]] = constant dense : vector<3xi1> // CHECK: %[[C2:.*]] = constant dense : vector<2x3xi1> // CHECK: %[[c0:.*]] = constant 0 : index // CHECK: %[[c1:.*]] = constant 1 : index -// CHECK: %[[T0:.*]] = index_cast %[[B]] : index to i64 -// CHECK: %[[T1:.*]] = splat %[[T0]] : vector<3xi64> -// CHECK: %[[T2:.*]] = cmpi "slt", %[[CI]], %[[T1]] : vector<3xi64> -// CHECK: %[[T3:.*]] = cmpi "slt", %[[c0]], %[[A]] : index -// CHECK: %[[T4:.*]] = select %[[T3]], %[[T2]], %[[CF]] : vector<3xi1> -// CHECK: %[[T5:.*]] = vector.insert %[[T4]], %[[C2]] [0] : vector<3xi1> into vector<2x3xi1> -// CHECK: %[[T6:.*]] = cmpi "slt", %[[c1]], %[[A]] : index -// CHECK: %[[T7:.*]] = select %[[T6]], %[[T2]], %[[CF]] : vector<3xi1> -// CHECK: %[[T8:.*]] = vector.insert %[[T7]], %[[T5]] [1] : vector<3xi1> into vector<2x3xi1> -// CHECK: return %[[T8]] : vector<2x3xi1> +// CHECK: %[[T0:.*]] = vector.create_mask %[[B]] : vector<3xi1> +// CHECK: %[[T1:.*]] = cmpi "slt", %[[c0]], %[[A]] : index +// CHECK: %[[T2:.*]] = select %[[T1]], %[[T0]], %[[C1]] : vector<3xi1> +// CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[C2]] [0] : vector<3xi1> into vector<2x3xi1> +// CHECK: %[[T4:.*]] = cmpi "slt", %[[c1]], %[[A]] : index +// CHECK: %[[T5:.*]] = select %[[T4]], %[[T0]], %[[C1]] : vector<3xi1> +// CHECK: %[[T6:.*]] = vector.insert %[[T5]], %[[T3]] [1] : vector<3xi1> into vector<2x3xi1> +// CHECK: return %[[T6]] : vector<2x3xi1> func @genbool_var_2d(%arg0: index, %arg1: index) -> vector<2x3xi1> { %0 = vector.create_mask %arg0, %arg1 : vector<2x3xi1> return %0 : vector<2x3xi1> } +// CHECK-LABEL: func @genbool_var_3d( +// CHECK-SAME: %[[A:.*0]]: index, +// CHECK-SAME: %[[B:.*1]]: index, +// CHECK-SAME: %[[C:.*2]]: index) +// CHECK: %[[C1:.*]] = constant dense : vector<7xi1> +// CHECK: %[[C2:.*]] = constant dense : vector<1x7xi1> +// CHECK: %[[C3:.*]] = constant dense : vector<2x1x7xi1> +// CHECK: %[[c0:.*]] = constant 0 : index +// CHECK: %[[c1:.*]] = constant 1 : index +// CHECK: %[[T0:.*]] = vector.create_mask %[[C]] : vector<7xi1> +// CHECK: %[[T1:.*]] = cmpi "slt", %[[c0]], %[[B]] : index +// CHECK: %[[T2:.*]] = select %[[T1]], %[[T0]], %[[C1]] : vector<7xi1> +// CHECK: %[[T3:.*]] = vector.insert %[[T2]], %[[C2]] [0] : vector<7xi1> into vector<1x7xi1> +// CHECK: %[[T4:.*]] = cmpi "slt", %[[c0]], %[[A]] : index +// CHECK: %[[T5:.*]] = select %[[T4]], %[[T3]], %[[C2]] : vector<1x7xi1> +// CHECK: %[[T6:.*]] = vector.insert %[[T5]], %[[C3]] [0] : vector<1x7xi1> into vector<2x1x7xi1> +// CHECK: %[[T7:.*]] = cmpi "slt", %[[c1]], %[[A]] : index +// CHECK: %[[T8:.*]] = select %[[T7]], %[[T3]], %[[C2]] : vector<1x7xi1> +// CHECK: %[[T9:.*]] = vector.insert %[[T8]], %[[T6]] [1] : vector<1x7xi1> into vector<2x1x7xi1> +// CHECK: return %[[T9]] : vector<2x1x7xi1> + +func @genbool_var_3d(%arg0: index, %arg1: index, %arg2: index) -> vector<2x1x7xi1> { + %0 = vector.create_mask %arg0, %arg1, %arg2 : vector<2x1x7xi1> + return %0 : vector<2x1x7xi1> +} + #matmat_accesses_0 = [ affine_map<(m, n, k) -> (m, k)>, affine_map<(m, n, k) -> (k, n)>, From 8514ecb02d4330bc075b9c8fef77c87810088d2f Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Mon, 17 Aug 2020 16:24:45 -0700 Subject: [PATCH 166/465] [libc] Add implementations of remquo[f|l] and remainder[f|l]. The implementation is not fully standards compliant in the sense that errno is not set on error, and floating point exceptions are not raised. Subnormal range and normal range are tested separately in the tests. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D86666 --- libc/config/linux/api.td | 6 + libc/config/linux/x86_64/entrypoints.txt | 6 + libc/spec/stdc.td | 8 ++ libc/src/math/CMakeLists.txt | 72 ++++++++++++ libc/src/math/remainder.cpp | 19 +++ libc/src/math/remainder.h | 18 +++ libc/src/math/remainderf.cpp | 19 +++ libc/src/math/remainderf.h | 18 +++ libc/src/math/remainderl.cpp | 19 +++ libc/src/math/remainderl.h | 18 +++ libc/src/math/remquo.cpp | 18 +++ libc/src/math/remquo.h | 18 +++ libc/src/math/remquof.cpp | 18 +++ libc/src/math/remquof.h | 18 +++ libc/src/math/remquol.cpp | 19 +++ libc/src/math/remquol.h | 18 +++ libc/test/src/math/CMakeLists.txt | 39 ++++++ libc/test/src/math/remquo_test.cpp | 91 ++++++++++++++ libc/test/src/math/remquof_test.cpp | 91 ++++++++++++++ libc/test/src/math/remquol_test.cpp | 97 +++++++++++++++ libc/utils/FPUtil/CMakeLists.txt | 1 + .../FPUtil/DivisionAndRemainderOperations.h | 111 ++++++++++++++++++ libc/utils/FPUtil/FPBits.h | 8 ++ libc/utils/FPUtil/LongDoubleBitsX86.h | 9 ++ 24 files changed, 759 insertions(+) create mode 100644 libc/src/math/remainder.cpp create mode 100644 libc/src/math/remainder.h create mode 100644 libc/src/math/remainderf.cpp create mode 100644 libc/src/math/remainderf.h create mode 100644 libc/src/math/remainderl.cpp create mode 100644 libc/src/math/remainderl.h create mode 100644 libc/src/math/remquo.cpp create mode 100644 libc/src/math/remquo.h create mode 100644 libc/src/math/remquof.cpp create mode 100644 libc/src/math/remquof.h create mode 100644 libc/src/math/remquol.cpp create mode 100644 libc/src/math/remquol.h create mode 100644 libc/test/src/math/remquo_test.cpp create mode 100644 libc/test/src/math/remquof_test.cpp create mode 100644 libc/test/src/math/remquol_test.cpp create mode 100644 libc/utils/FPUtil/DivisionAndRemainderOperations.h diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 063fe401da8b5..33ae64c0a08cb 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -199,6 +199,12 @@ def MathAPI : PublicAPI<"math.h"> { "modfl", "expf", "exp2f", + "remainderf", + "remainder", + "remainderl", + "remquof", + "remquo", + "remquol", "round", "roundf", "roundl", diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index c24173b1d0e77..6aca5e400d68a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -103,6 +103,12 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.modf libc.src.math.modff libc.src.math.modfl + libc.src.math.remainderf + libc.src.math.remainder + libc.src.math.remainderl + libc.src.math.remquof + libc.src.math.remquo + libc.src.math.remquol libc.src.math.round libc.src.math.roundf libc.src.math.roundl diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 15fc12d375e63..77fa971adc614 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -310,6 +310,14 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"expf", RetValSpec, [ArgSpec]>, FunctionSpec<"exp2f", RetValSpec, [ArgSpec]>, + FunctionSpec<"remainderf", RetValSpec, [ArgSpec, ArgSpec]>, + FunctionSpec<"remainder", RetValSpec, [ArgSpec, ArgSpec]>, + FunctionSpec<"remainderl", RetValSpec, [ArgSpec, ArgSpec]>, + + FunctionSpec<"remquof", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + FunctionSpec<"remquo", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + FunctionSpec<"remquol", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + FunctionSpec<"round", RetValSpec, [ArgSpec]>, FunctionSpec<"roundf", RetValSpec, [ArgSpec]>, FunctionSpec<"roundl", RetValSpec, [ArgSpec]>, diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 0c878de2ac95d..3b4f821726576 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -521,3 +521,75 @@ add_entrypoint_object( COMPILE_OPTIONS -O2 ) + +add_entrypoint_object( + remquof + SRCS + remquof.cpp + HDRS + remquof.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) + +add_entrypoint_object( + remquo + SRCS + remquo.cpp + HDRS + remquo.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) + +add_entrypoint_object( + remquol + SRCS + remquol.cpp + HDRS + remquol.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) + +add_entrypoint_object( + remainderf + SRCS + remainderf.cpp + HDRS + remainderf.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) + +add_entrypoint_object( + remainder + SRCS + remainder.cpp + HDRS + remainder.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) + +add_entrypoint_object( + remainderl + SRCS + remainderl.cpp + HDRS + remainderl.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O2 +) diff --git a/libc/src/math/remainder.cpp b/libc/src/math/remainder.cpp new file mode 100644 index 0000000000000..880e6a60f4a1d --- /dev/null +++ b/libc/src/math/remainder.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of remainder function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +double LLVM_LIBC_ENTRYPOINT(remainder)(double x, double y) { + int quotient; + return fputil::remquo(x, y, quotient); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remainder.h b/libc/src/math/remainder.h new file mode 100644 index 0000000000000..8a720fc23b6be --- /dev/null +++ b/libc/src/math/remainder.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remainder ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMAINDER_H +#define LLVM_LIBC_SRC_MATH_REMAINDER_H + +namespace __llvm_libc { + +double remainder(double x, double y); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMAINDER_H diff --git a/libc/src/math/remainderf.cpp b/libc/src/math/remainderf.cpp new file mode 100644 index 0000000000000..bab320101d58c --- /dev/null +++ b/libc/src/math/remainderf.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of remainderf function -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +float LLVM_LIBC_ENTRYPOINT(remainderf)(float x, float y) { + int quotient; + return fputil::remquo(x, y, quotient); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remainderf.h b/libc/src/math/remainderf.h new file mode 100644 index 0000000000000..19a16d08a94d3 --- /dev/null +++ b/libc/src/math/remainderf.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remainderf --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMAINDERF_H +#define LLVM_LIBC_SRC_MATH_REMAINDERF_H + +namespace __llvm_libc { + +float remainderf(float x, float y); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMAINDERF_H diff --git a/libc/src/math/remainderl.cpp b/libc/src/math/remainderl.cpp new file mode 100644 index 0000000000000..bd9bc4985d967 --- /dev/null +++ b/libc/src/math/remainderl.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of remainderl function -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +long double LLVM_LIBC_ENTRYPOINT(remainderl)(long double x, long double y) { + int quotient; + return fputil::remquo(x, y, quotient); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remainderl.h b/libc/src/math/remainderl.h new file mode 100644 index 0000000000000..f2837635ab77c --- /dev/null +++ b/libc/src/math/remainderl.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remainderl --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMAINDERL_H +#define LLVM_LIBC_SRC_MATH_REMAINDERL_H + +namespace __llvm_libc { + +long double remainderl(long double x, long double y); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMAINDERL_H diff --git a/libc/src/math/remquo.cpp b/libc/src/math/remquo.cpp new file mode 100644 index 0000000000000..b61d7d4d1bed5 --- /dev/null +++ b/libc/src/math/remquo.cpp @@ -0,0 +1,18 @@ +//===-- Implementation of remquo function ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +double LLVM_LIBC_ENTRYPOINT(remquo)(double x, double y, int *exp) { + return fputil::remquo(x, y, *exp); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remquo.h b/libc/src/math/remquo.h new file mode 100644 index 0000000000000..cb753fee6ea0d --- /dev/null +++ b/libc/src/math/remquo.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remquo ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMQUO_H +#define LLVM_LIBC_SRC_MATH_REMQUO_H + +namespace __llvm_libc { + +double remquo(double x, double y, int *exp); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMQUO_H diff --git a/libc/src/math/remquof.cpp b/libc/src/math/remquof.cpp new file mode 100644 index 0000000000000..246bee038f116 --- /dev/null +++ b/libc/src/math/remquof.cpp @@ -0,0 +1,18 @@ +//===-- Implementation of remquof function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +float LLVM_LIBC_ENTRYPOINT(remquof)(float x, float y, int *exp) { + return fputil::remquo(x, y, *exp); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remquof.h b/libc/src/math/remquof.h new file mode 100644 index 0000000000000..feb2e4f5e0dd5 --- /dev/null +++ b/libc/src/math/remquof.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remquof -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMQUOF_H +#define LLVM_LIBC_SRC_MATH_REMQUOF_H + +namespace __llvm_libc { + +float remquof(float x, float y, int *exp); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMQUOF_H diff --git a/libc/src/math/remquol.cpp b/libc/src/math/remquol.cpp new file mode 100644 index 0000000000000..8e0287682dbfe --- /dev/null +++ b/libc/src/math/remquol.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of remquol function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/common.h" +#include "utils/FPUtil/DivisionAndRemainderOperations.h" + +namespace __llvm_libc { + +long double LLVM_LIBC_ENTRYPOINT(remquol)(long double x, long double y, + int *exp) { + return fputil::remquo(x, y, *exp); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/remquol.h b/libc/src/math/remquol.h new file mode 100644 index 0000000000000..d1b0e20fcc865 --- /dev/null +++ b/libc/src/math/remquol.h @@ -0,0 +1,18 @@ +//===-- Implementation header for remquol -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_REMQUOL_H +#define LLVM_LIBC_SRC_MATH_REMQUOL_H + +namespace __llvm_libc { + +long double remquol(long double x, long double y, int *exp); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_REMQUOL_H diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 07b5052074528..e1bac1a339067 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -552,3 +552,42 @@ add_fp_unittest( libc.src.math.sqrtl libc.utils.FPUtil.fputil ) + +add_fp_unittest( + remquof_test + NEED_MPFR + SUITE + libc_math_unittests + SRCS + remquof_test.cpp + DEPENDS + libc.include.math + libc.src.math.remquof + libc.utils.FPUtil.fputil +) + +add_fp_unittest( + remquo_test + NEED_MPFR + SUITE + libc_math_unittests + SRCS + remquo_test.cpp + DEPENDS + libc.include.math + libc.src.math.remquo + libc.utils.FPUtil.fputil +) + +add_fp_unittest( + remquol_test + NEED_MPFR + SUITE + libc_math_unittests + SRCS + remquol_test.cpp + DEPENDS + libc.include.math + libc.src.math.remquol + libc.utils.FPUtil.fputil +) diff --git a/libc/test/src/math/remquo_test.cpp b/libc/test/src/math/remquo_test.cpp new file mode 100644 index 0000000000000..4ea61dddf26c6 --- /dev/null +++ b/libc/test/src/math/remquo_test.cpp @@ -0,0 +1,91 @@ +//===-- Unittests for remquo ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/math.h" +#include "src/math/remquo.h" +#include "utils/FPUtil/BasicOperations.h" +#include "utils/FPUtil/FPBits.h" +#include "utils/FPUtil/TestHelpers.h" +#include "utils/MPFRWrapper/MPFRUtils.h" +#include "utils/UnitTest/Test.h" + +using FPBits = __llvm_libc::fputil::FPBits; +using UIntType = FPBits::UIntType; + +namespace mpfr = __llvm_libc::testing::mpfr; + +static const float zero = FPBits::zero(); +static const float negZero = FPBits::negZero(); +static const float nan = FPBits::buildNaN(1); +static const float inf = FPBits::inf(); +static const float negInf = FPBits::negInf(); + +TEST(RemquoTest, SpecialNumbers) { + int exponent; + double x, y; + + y = 1.0; + x = inf; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + x = negInf; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + + x = 1.0; + y = zero; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + y = negZero; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + + y = nan; + x = 1.0; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + + y = 1.0; + x = nan; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + + x = nan; + y = nan; + EXPECT_NE(isnan(__llvm_libc::remquo(x, y, &exponent)), 0); + + x = zero; + y = 1.0; + EXPECT_FP_EQ(__llvm_libc::remquo(x, y, &exponent), zero); + + x = negZero; + y = 1.0; + EXPECT_FP_EQ(__llvm_libc::remquo(x, y, &exponent), negZero); +} + +TEST(RemquoTest, SubnormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = + (FPBits::maxSubnormal - FPBits::minSubnormal) / count; + for (UIntType v = FPBits::minSubnormal, w = FPBits::maxSubnormal; + v <= FPBits::maxSubnormal && w >= FPBits::minSubnormal; + v += step, w -= step) { + double x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + mpfr::BinaryInput input{x, y}; + result.f = __llvm_libc::remquo(x, y, &result.i); + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } +} + +TEST(RemquoTest, NormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = (FPBits::maxNormal - FPBits::minNormal) / count; + for (UIntType v = FPBits::minNormal, w = FPBits::maxNormal; + v <= FPBits::maxNormal && w >= FPBits::minNormal; v += step, w -= step) { + double x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + mpfr::BinaryInput input{x, y}; + result.f = __llvm_libc::remquo(x, y, &result.i); + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } +} diff --git a/libc/test/src/math/remquof_test.cpp b/libc/test/src/math/remquof_test.cpp new file mode 100644 index 0000000000000..0c51d5f5324df --- /dev/null +++ b/libc/test/src/math/remquof_test.cpp @@ -0,0 +1,91 @@ +//===-- Unittests for remquof ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/math.h" +#include "src/math/remquof.h" +#include "utils/FPUtil/BasicOperations.h" +#include "utils/FPUtil/FPBits.h" +#include "utils/FPUtil/TestHelpers.h" +#include "utils/MPFRWrapper/MPFRUtils.h" +#include "utils/UnitTest/Test.h" + +using FPBits = __llvm_libc::fputil::FPBits; +using UIntType = FPBits::UIntType; + +namespace mpfr = __llvm_libc::testing::mpfr; + +static const float zero = FPBits::zero(); +static const float negZero = FPBits::negZero(); +static const float nan = FPBits::buildNaN(1); +static const float inf = FPBits::inf(); +static const float negInf = FPBits::negInf(); + +TEST(RemquofTest, SpecialNumbers) { + int exponent; + float x, y; + + y = 1.0f; + x = inf; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + x = negInf; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + + x = 1.0f; + y = zero; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + y = negZero; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + + y = nan; + x = 1.0f; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + + y = 1.0f; + x = nan; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + + x = nan; + y = nan; + EXPECT_NE(isnan(__llvm_libc::remquof(x, y, &exponent)), 0); + + x = zero; + y = 1.0f; + EXPECT_FP_EQ(__llvm_libc::remquof(x, y, &exponent), zero); + + x = negZero; + y = 1.0f; + EXPECT_FP_EQ(__llvm_libc::remquof(x, y, &exponent), negZero); +} + +TEST(RemquofTest, SubnormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = + (FPBits::maxSubnormal - FPBits::minSubnormal) / count; + for (UIntType v = FPBits::minSubnormal, w = FPBits::maxSubnormal; + v <= FPBits::maxSubnormal && w >= FPBits::minSubnormal; + v += step, w -= step) { + float x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + mpfr::BinaryInput input{x, y}; + result.f = __llvm_libc::remquof(x, y, &result.i); + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } +} + +TEST(RemquofTest, NormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = (FPBits::maxNormal - FPBits::minNormal) / count; + for (UIntType v = FPBits::minNormal, w = FPBits::maxNormal; + v <= FPBits::maxNormal && w >= FPBits::minNormal; v += step, w -= step) { + float x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + mpfr::BinaryInput input{x, y}; + result.f = __llvm_libc::remquof(x, y, &result.i); + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } +} diff --git a/libc/test/src/math/remquol_test.cpp b/libc/test/src/math/remquol_test.cpp new file mode 100644 index 0000000000000..eab3a5fb1fa6a --- /dev/null +++ b/libc/test/src/math/remquol_test.cpp @@ -0,0 +1,97 @@ +//===-- Unittests for remquol ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/math.h" +#include "src/math/remquol.h" +#include "utils/FPUtil/BasicOperations.h" +#include "utils/FPUtil/FPBits.h" +#include "utils/FPUtil/TestHelpers.h" +#include "utils/MPFRWrapper/MPFRUtils.h" +#include "utils/UnitTest/Test.h" + +using FPBits = __llvm_libc::fputil::FPBits; +using UIntType = FPBits::UIntType; + +namespace mpfr = __llvm_libc::testing::mpfr; + +static const long double zero = FPBits::zero(); +static const long double negZero = FPBits::negZero(); +static const long double nan = FPBits::buildNaN(1); +static const long double inf = FPBits::inf(); +static const long double negInf = FPBits::negInf(); + +TEST(RemquoTest, SpecialNumbers) { + int exponent; + long double x, y; + + y = 1.0l; + x = inf; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + x = negInf; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + + x = 1.0l; + y = zero; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + y = negZero; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + + y = nan; + x = 1.0l; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + + y = 1.0l; + x = nan; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + + x = nan; + y = nan; + EXPECT_NE(isnan(__llvm_libc::remquol(x, y, &exponent)), 0); + + x = zero; + y = 1.0l; + EXPECT_FP_EQ(__llvm_libc::remquol(x, y, &exponent), zero); + + x = negZero; + y = 1.0l; + EXPECT_FP_EQ(__llvm_libc::remquol(x, y, &exponent), negZero); +} + +TEST(RemquofTest, SubnormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = + (FPBits::maxSubnormal - FPBits::minSubnormal) / count; + for (UIntType v = FPBits::minSubnormal, w = FPBits::maxSubnormal; + v <= FPBits::maxSubnormal && w >= FPBits::minSubnormal; + v += step, w -= step) { + long double x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + mpfr::BinaryInput input{x, y}; + result.f = __llvm_libc::remquol(x, y, &result.i); + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } +} + +TEST(RemquofTest, NormalRange) { + constexpr UIntType count = 1000001; + constexpr UIntType step = (FPBits::maxNormal - FPBits::minNormal) / count; + for (UIntType v = FPBits::minNormal, w = FPBits::maxNormal; + v <= FPBits::maxNormal && w >= FPBits::minNormal; v += step, w -= step) { + long double x = FPBits(v), y = FPBits(w); + mpfr::BinaryOutput result; + result.f = __llvm_libc::remquol(x, y, &result.i); + // In normal range on x86 platforms, the implicit 1 bit can be zero making + // the numbers NaN. Hence we test for them separately. + if (isnan(x) || isnan(y)) { + ASSERT_NE(isnan(result.f), 0); + } else { + mpfr::BinaryInput input{x, y}; + ASSERT_MPFR_MATCH(mpfr::Operation::RemQuo, input, result, 0.0); + } + } +} diff --git a/libc/utils/FPUtil/CMakeLists.txt b/libc/utils/FPUtil/CMakeLists.txt index 745ede30fedd6..8a6cc36a7b516 100644 --- a/libc/utils/FPUtil/CMakeLists.txt +++ b/libc/utils/FPUtil/CMakeLists.txt @@ -11,6 +11,7 @@ add_header_library( BasicOperations.h BitPatterns.h ClassificationFunctions.h + DivisionAndRemainderOperations.h FloatOperations.h FloatProperties.h FPBits.h diff --git a/libc/utils/FPUtil/DivisionAndRemainderOperations.h b/libc/utils/FPUtil/DivisionAndRemainderOperations.h new file mode 100644 index 0000000000000..ceae538027f3c --- /dev/null +++ b/libc/utils/FPUtil/DivisionAndRemainderOperations.h @@ -0,0 +1,111 @@ +//===-- Floating point divsion and remainder operations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_UTILS_FPUTIL_DIVISION_AND_REMAINDER_OPERATIONS_H +#define LLVM_LIBC_UTILS_FPUTIL_DIVISION_AND_REMAINDER_OPERATIONS_H + +#include "FPBits.h" +#include "ManipulationFunctions.h" +#include "NormalFloat.h" + +#include "utils/CPP/TypeTraits.h" + +namespace __llvm_libc { +namespace fputil { + +static constexpr int quotientLSBBits = 3; + +// The implementation is a bit-by-bit algorithm which uses integer division +// to evaluate the quotient and remainder. +template ::Value, int> = 0> +static inline T remquo(T x, T y, int &q) { + FPBits xbits(x), ybits(y); + if (xbits.isNaN()) + return x; + if (ybits.isNaN()) + return y; + if (xbits.isInf() || ybits.isZero()) + return FPBits::buildNaN(1); + + if (xbits.isZero() || ybits.isInf()) { + q = 0; + return __llvm_libc::fputil::copysign(T(0.0), x); + } + + bool resultSign = (xbits.sign == ybits.sign ? false : true); + + // Once we know the sign of the result, we can just operate on the absolute + // values. The correct sign can be applied to the result after the result + // is evaluated. + xbits.sign = ybits.sign = 0; + + NormalFloat normalx(xbits), normaly(ybits); + int exp = normalx.exponent - normaly.exponent; + typename NormalFloat::UIntType mx = normalx.mantissa, + my = normaly.mantissa; + + q = 0; + while (exp >= 0) { + unsigned shiftCount = 0; + typename NormalFloat::UIntType n = mx; + for (shiftCount = 0; n < my; n <<= 1, ++shiftCount) + ; + + if (static_cast(shiftCount) > exp) + break; + + exp -= shiftCount; + if (0 <= exp && exp < quotientLSBBits) + q |= (1 << exp); + + mx = n - my; + if (mx == 0) + return __llvm_libc::fputil::copysign(T(0.0), x); + } + + NormalFloat remainder(exp + normaly.exponent, mx, 0); + + // Since NormalFloat to native type conversion is a truncation operation + // currently, the remainder value in the native type is correct as is. + // However, if NormalFloat to native type conversion is updated in future, + // then the conversion to native remainder value should be updated + // appropriately and some directed tests added. + T nativeRemainder(remainder); + T absy = T(ybits); + int cmp = remainder.mul2(1).cmp(normaly); + if (cmp > 0) { + q = q + 1; + if (x >= T(0.0)) + nativeRemainder = nativeRemainder - absy; + else + nativeRemainder = absy - nativeRemainder; + } else if (cmp == 0) { + if (q & 1) { + q += 1; + if (x >= T(0.0)) + nativeRemainder = -nativeRemainder; + } else { + if (x < T(0.0)) + nativeRemainder = -nativeRemainder; + } + } else { + if (x < T(0.0)) + nativeRemainder = -nativeRemainder; + } + + q = resultSign ? -q : q; + if (nativeRemainder == T(0.0)) + return __llvm_libc::fputil::copysign(T(0.0), x); + return nativeRemainder; +} + +} // namespace fputil +} // namespace __llvm_libc + +#endif // LLVM_LIBC_UTILS_FPUTIL_DIVISION_AND_REMAINDER_OPERATIONS_H diff --git a/libc/utils/FPUtil/FPBits.h b/libc/utils/FPUtil/FPBits.h index 2c630dba2af76..89bdd92669b81 100644 --- a/libc/utils/FPUtil/FPBits.h +++ b/libc/utils/FPUtil/FPBits.h @@ -73,6 +73,14 @@ template struct __attribute__((packed)) FPBits { static constexpr int exponentBias = (1 << (ExponentWidth::value - 1)) - 1; static constexpr int maxExponent = (1 << ExponentWidth::value) - 1; + static constexpr UIntType minSubnormal = UIntType(1); + static constexpr UIntType maxSubnormal = + (UIntType(1) << MantissaWidth::value) - 1; + static constexpr UIntType minNormal = + (UIntType(1) << MantissaWidth::value); + static constexpr UIntType maxNormal = + ((UIntType(maxExponent) - 1) << MantissaWidth::value) | maxSubnormal; + // We don't want accidental type promotions/conversions so we require exact // type match. template struct __attribute__((packed)) FPBits { static constexpr int exponentBias = 0x3FFF; static constexpr int maxExponent = 0x7FFF; + static constexpr UIntType minSubnormal = UIntType(1); + // Subnormal numbers include the implicit bit in x86 long double formats. + static constexpr UIntType maxSubnormal = + (UIntType(1) << (MantissaWidth::value + 1)) - 1; + static constexpr UIntType minNormal = + (UIntType(3) << MantissaWidth::value); + static constexpr UIntType maxNormal = + ((UIntType(maxExponent) - 1) << (MantissaWidth::value + 1)) | + (UIntType(1) << MantissaWidth::value) | maxSubnormal; UIntType mantissa : MantissaWidth::value; uint8_t implicitBit : 1; From 23bcfbcc9866b78c7229cc7afb087d0f5d3e8b2f Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 4 Sep 2020 05:24:50 +0000 Subject: [PATCH 167/465] Add comment to describe a field member (NFC) Address post-review comment. --- mlir/include/mlir/Pass/PassManager.h | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/include/mlir/Pass/PassManager.h b/mlir/include/mlir/Pass/PassManager.h index ec88485cd3efa..9aace79f2053f 100644 --- a/mlir/include/mlir/Pass/PassManager.h +++ b/mlir/include/mlir/Pass/PassManager.h @@ -308,6 +308,7 @@ class PassManager : public OpPassManager { runWithCrashRecovery(MutableArrayRef> passes, ModuleOp module, AnalysisManager am); + /// Context this PassManager was initialized with. MLIRContext *context; /// Flag that specifies if pass statistics should be dumped. From 17ac4543d427c5981e7f0313cba180b1d6c90c50 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Thu, 3 Sep 2020 22:25:14 -0700 Subject: [PATCH 168/465] [libc][obvious] Use constants of type double in remquo_test. --- libc/test/src/math/remquo_test.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libc/test/src/math/remquo_test.cpp b/libc/test/src/math/remquo_test.cpp index 4ea61dddf26c6..0ebbed7224b22 100644 --- a/libc/test/src/math/remquo_test.cpp +++ b/libc/test/src/math/remquo_test.cpp @@ -19,11 +19,11 @@ using UIntType = FPBits::UIntType; namespace mpfr = __llvm_libc::testing::mpfr; -static const float zero = FPBits::zero(); -static const float negZero = FPBits::negZero(); -static const float nan = FPBits::buildNaN(1); -static const float inf = FPBits::inf(); -static const float negInf = FPBits::negInf(); +static const double zero = FPBits::zero(); +static const double negZero = FPBits::negZero(); +static const double nan = FPBits::buildNaN(1); +static const double inf = FPBits::inf(); +static const double negInf = FPBits::negInf(); TEST(RemquoTest, SpecialNumbers) { int exponent; From a9c0bf04043462d43013bc5616aa48f6d3e16b88 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 4 Sep 2020 00:17:34 -0700 Subject: [PATCH 169/465] [NFC][Asan] Inline enum doc strings --- compiler-rt/lib/asan/asan_allocator.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index c7c9d7a7b3ce4..cb9f49f73a32c 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -101,16 +101,14 @@ static const uptr kChunkHeader2Size = sizeof(ChunkBase) - kChunkHeaderSize; COMPILER_CHECK(kChunkHeaderSize == 16); COMPILER_CHECK(kChunkHeader2Size <= 16); -// Every chunk of memory allocated by this allocator can be in one of 3 states: -// CHUNK_AVAILABLE: the chunk is in the free list and ready to be allocated. -// CHUNK_ALLOCATED: the chunk is allocated and not yet freed. -// CHUNK_QUARANTINE: the chunk was freed and put into quarantine zone. enum { // Either just allocated by underlying allocator, but AsanChunk is not yet // ready, or almost returned to undelying allocator and AsanChunk is already // meaningless. CHUNK_INVALID = 0, + // The chunk is allocated and not yet freed. CHUNK_ALLOCATED = 2, + // The chunk was freed and put into quarantine zone. CHUNK_QUARANTINE = 3 }; From bdc4c0bc5c5e522aa770363fa6f50a3d5a5eadc2 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Fri, 4 Sep 2020 09:30:12 +0200 Subject: [PATCH 170/465] Revert "[lldb] avoid assert in threadsanitizer tests on linux" This reverts commit f369d51896e1c0f61df253b116c42771479549df. The bug this fixes was already fixed by 1c5a0cb1c3bffdae0d73acf8a23e31646b35c596 with the same approach and this commit is now just giving the variable a second fallback value. --- .../InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp index 68e732538158a..a2954f556b103 100644 --- a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp +++ b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp @@ -855,8 +855,6 @@ bool InstrumentationRuntimeTSan::NotifyBreakpointHit( }); report->GetAsDictionary()->AddBooleanItem("all_addresses_are_same", all_addresses_are_same); - } else { - stop_reason_description = "unknown ThreadSanitizer stop reason"; } // Make sure this is the right process From aec9e20a3e9a4f25a5b1e07816c95f970300d918 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Fri, 4 Sep 2020 10:00:09 +0200 Subject: [PATCH 171/465] [mlir] introduce type constraints for operands of LLVM dialect operations Historically, the operations in the MLIR's LLVM dialect only checked that the operand are of LLVM dialect type without more detailed constraints. This was due to LLVM dialect types wrapping LLVM IR types and having clunky verification methods. With the new first-class modeling, it is possible to define type constraints similarly to other dialects and use them to enforce some correctness rules in verifiers instead of having LLVM assert during translation to LLVM IR. This hardening discovered several issues where MLIR was producing LLVM dialect operations that cannot exist in LLVM IR. Depends On D85900 Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D85901 --- mlir/include/mlir/Dialect/GPU/GPUOps.td | 3 +- .../include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 126 ++++++++++--- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 171 +++++++++++------- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 2 - mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp | 3 +- mlir/test/Dialect/LLVMIR/invalid.mlir | 4 +- 6 files changed, 215 insertions(+), 94 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index 288031c598ff4..0ae6267cb67cb 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -21,7 +21,8 @@ include "mlir/Interfaces/SideEffectInterfaces.td" // Type constraint accepting standard integers, indices and wrapped LLVM integer // types. def IntLikeOrLLVMInt : TypeConstraint< - Or<[AnySignlessInteger.predicate, Index.predicate, LLVMInt.predicate]>, + Or<[AnySignlessInteger.predicate, Index.predicate, + LLVM_AnyInteger.predicate]>, "integer, index or LLVM dialect equivalent">; //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index 1f0eb6aab58a1..10755a436115f 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -17,6 +17,10 @@ include "mlir/IR/OpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" +//===----------------------------------------------------------------------===// +// LLVM Dialect. +//===----------------------------------------------------------------------===// + def LLVM_Dialect : Dialect { let name = "llvm"; let cppNamespace = "LLVM"; @@ -38,34 +42,108 @@ def LLVM_Dialect : Dialect { }]; } -// LLVM IR type wrapped in MLIR. +//===----------------------------------------------------------------------===// +// LLVM dialect type constraints. +//===----------------------------------------------------------------------===// + +// LLVM dialect type. def LLVM_Type : DialectType()">, "LLVM dialect type">; -// Type constraint accepting only wrapped LLVM integer types. -def LLVMInt : TypeConstraint< - And<[LLVM_Type.predicate, - CPred<"$_self.cast<::mlir::LLVM::LLVMType>().isIntegerTy()">]>, - "LLVM dialect integer">; +// Type constraint accepting LLVM integer types. +def LLVM_AnyInteger : Type< + CPred<"$_self.isa<::mlir::LLVM::LLVMIntegerType>()">, + "LLVM integer type">; + +// Type constraints accepting LLVM integer type of a specific width. +class LLVM_IntBase : + Type().getBitWidth() == " + # width>]>, + "LLVM " # width # "-bit integer type">, + BuildableType< + "::mlir::LLVM::LLVMIntegerType::get($_builder.getContext(), " + # width # ")">; + +def LLVM_i1 : LLVM_IntBase<1>; +def LLVM_i8 : LLVM_IntBase<8>; +def LLVM_i32 : LLVM_IntBase<32>; -def LLVMIntBase : TypeConstraint< +// Type constraint accepting LLVM primitive types, i.e. all types except void +// and function. +def LLVM_PrimitiveType : Type< And<[LLVM_Type.predicate, - CPred<"$_self.cast<::mlir::LLVM::LLVMType>().isIntegerTy()">]>, - "LLVM dialect integer">; - -// Integer type of a specific width. -class LLVMI - : Type().isIntegerTy(" # width # ")">]>, - "LLVM dialect " # width # "-bit integer">, - BuildableType< - "::mlir::LLVM::LLVMType::getIntNTy($_builder.getContext()," - # width # ")">; - -def LLVMI1 : LLVMI<1>; + CPred<"!$_self.isa<::mlir::LLVM::LLVMVoidType, " + "::mlir::LLVM::LLVMFunctionType>()">]>, + "primitive LLVM type">; + +// Type constraint accepting any LLVM floating point type. +def LLVM_AnyFloat : Type< + CPred<"$_self.isa<::mlir::LLVM::LLVMBFloatType, " + "::mlir::LLVM::LLVMHalfType, " + "::mlir::LLVM::LLVMFloatType, " + "::mlir::LLVM::LLVMDoubleType>()">, + "floating point LLVM type">; + +// Type constraint accepting any LLVM pointer type. +def LLVM_AnyPointer : Type()">, + "LLVM pointer type">; + +// Type constraint accepting LLVM pointer type with an additional constraint +// on the element type. +class LLVM_PointerTo : Type< + And<[LLVM_AnyPointer.predicate, + SubstLeaves< + "$_self", + "$_self.cast<::mlir::LLVM::LLVMPointerType>().getElementType()", + pointee.predicate>]>, + "LLVM pointer to " # pointee.description>; + +// Type constraint accepting any LLVM structure type. +def LLVM_AnyStruct : Type()">, + "LLVM structure type">; + +// Type constraint accepting opaque LLVM structure type. +def LLVM_OpaqueStruct : Type< + And<[LLVM_AnyStruct.predicate, + CPred<"$_self.cast<::mlir::LLVM::LLVMStructType>().isOpaque()">]>>; + +// Type constraint accepting any LLVM type that can be loaded or stored, i.e. a +// type that has size (not void, function or opaque struct type). +def LLVM_LoadableType : Type< + And<[LLVM_PrimitiveType.predicate, Neg]>, + "LLVM type with size">; + +// Type constraint accepting any LLVM aggregate type, i.e. structure or array. +def LLVM_AnyAggregate : Type< + CPred<"$_self.isa<::mlir::LLVM::LLVMStructType, " + "::mlir::LLVM::LLVMArrayType>()">, + "LLVM aggregate type">; + +// Type constraint accepting any LLVM non-aggregate type, i.e. not structure or +// array. +def LLVM_AnyNonAggregate : Type, + "LLVM non-aggregate type">; + +// Type constraint accepting any LLVM vector type. +def LLVM_AnyVector : Type()">, + "LLVM vector type">; + +// Type constraint accepting an LLVM vector type with an additional constraint +// on the vector element type. +class LLVM_VectorOf : Type< + And<[LLVM_AnyVector.predicate, + SubstLeaves< + "$_self", + "$_self.cast<::mlir::LLVM::LLVMVectorType>().getElementType()", + element.predicate>]>, + "LLVM vector of " # element.description>; + +// Type constraint accepting a constrained type, or a vector of such types. +class LLVM_ScalarOrVectorOf : + AnyTypeOf<[element, LLVM_VectorOf]>; // Base class for LLVM operations. Defines the interface to the llvm::IRBuilder // used to translate to LLVM IR proper. @@ -85,6 +163,10 @@ class LLVM_OpBase traits = []> : string llvmBuilder = ""; } +//===----------------------------------------------------------------------===// +// Base classes for LLVM dialect operations. +//===----------------------------------------------------------------------===// + // Base class for LLVM operations. All operations get an "llvm." prefix in // their name automatically. LLVM operations have either zero or one result, // this class is specialized below for both cases and should not be used diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index b1dd7b1af0301..b5bf4ac779727 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -87,39 +87,50 @@ class LLVM_TerminatorOp traits = []> : LLVM_Op; // Class for arithmetic binary operations. -class LLVM_ArithmeticOp traits = []> : +class LLVM_ArithmeticOpBase traits = []> : LLVM_OneResultOp, - Arguments<(ins LLVM_Type:$lhs, LLVM_Type:$rhs)>, + Arguments<(ins LLVM_ScalarOrVectorOf:$lhs, + LLVM_ScalarOrVectorOf:$rhs)>, LLVM_Builder<"$res = builder." # builderFunc # "($lhs, $rhs);"> { - let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }]; + let parser = + [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }]; let printer = [{ mlir::impl::printOneResultOp(this->getOperation(), p); }]; } -class LLVM_UnaryArithmeticOp traits = []> : +class LLVM_IntArithmeticOp traits = []> : + LLVM_ArithmeticOpBase; +class LLVM_FloatArithmeticOp traits = []> : + LLVM_ArithmeticOpBase; + +// Class for arithmetic unary operations. +class LLVM_UnaryArithmeticOp traits = []> : LLVM_OneResultOp, - Arguments<(ins LLVM_Type:$operand)>, + Arguments<(ins type:$operand)>, LLVM_Builder<"$res = builder." # builderFunc # "($operand);"> { - let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }]; + let parser = + [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }]; let printer = [{ mlir::impl::printOneResultOp(this->getOperation(), p); }]; } // Integer binary operations. -def LLVM_AddOp : LLVM_ArithmeticOp<"add", "CreateAdd", [Commutative]>; -def LLVM_SubOp : LLVM_ArithmeticOp<"sub", "CreateSub">; -def LLVM_MulOp : LLVM_ArithmeticOp<"mul", "CreateMul", [Commutative]>; -def LLVM_UDivOp : LLVM_ArithmeticOp<"udiv", "CreateUDiv">; -def LLVM_SDivOp : LLVM_ArithmeticOp<"sdiv", "CreateSDiv">; -def LLVM_URemOp : LLVM_ArithmeticOp<"urem", "CreateURem">; -def LLVM_SRemOp : LLVM_ArithmeticOp<"srem", "CreateSRem">; -def LLVM_AndOp : LLVM_ArithmeticOp<"and", "CreateAnd">; -def LLVM_OrOp : LLVM_ArithmeticOp<"or", "CreateOr">; -def LLVM_XOrOp : LLVM_ArithmeticOp<"xor", "CreateXor">; -def LLVM_ShlOp : LLVM_ArithmeticOp<"shl", "CreateShl">; -def LLVM_LShrOp : LLVM_ArithmeticOp<"lshr", "CreateLShr">; -def LLVM_AShrOp : LLVM_ArithmeticOp<"ashr", "CreateAShr">; +def LLVM_AddOp : LLVM_IntArithmeticOp<"add", "CreateAdd", [Commutative]>; +def LLVM_SubOp : LLVM_IntArithmeticOp<"sub", "CreateSub">; +def LLVM_MulOp : LLVM_IntArithmeticOp<"mul", "CreateMul", [Commutative]>; +def LLVM_UDivOp : LLVM_IntArithmeticOp<"udiv", "CreateUDiv">; +def LLVM_SDivOp : LLVM_IntArithmeticOp<"sdiv", "CreateSDiv">; +def LLVM_URemOp : LLVM_IntArithmeticOp<"urem", "CreateURem">; +def LLVM_SRemOp : LLVM_IntArithmeticOp<"srem", "CreateSRem">; +def LLVM_AndOp : LLVM_IntArithmeticOp<"and", "CreateAnd">; +def LLVM_OrOp : LLVM_IntArithmeticOp<"or", "CreateOr">; +def LLVM_XOrOp : LLVM_IntArithmeticOp<"xor", "CreateXor">; +def LLVM_ShlOp : LLVM_IntArithmeticOp<"shl", "CreateShl">; +def LLVM_LShrOp : LLVM_IntArithmeticOp<"lshr", "CreateLShr">; +def LLVM_AShrOp : LLVM_IntArithmeticOp<"ashr", "CreateAShr">; // Predicate for integer comparisons. def ICmpPredicateEQ : I64EnumAttrCase<"eq", 0>; @@ -143,8 +154,9 @@ def ICmpPredicate : I64EnumAttr< // Other integer operations. def LLVM_ICmpOp : LLVM_OneResultOp<"icmp", [NoSideEffect]>, - Arguments<(ins ICmpPredicate:$predicate, LLVM_Type:$lhs, - LLVM_Type:$rhs)> { + Arguments<(ins ICmpPredicate:$predicate, + LLVM_ScalarOrVectorOf:$lhs, + LLVM_ScalarOrVectorOf:$rhs)> { let llvmBuilder = [{ $res = builder.CreateICmp(getLLVMCmpPredicate($predicate), $lhs, $rhs); }]; @@ -189,8 +201,9 @@ def FCmpPredicate : I64EnumAttr< // Other integer operations. def LLVM_FCmpOp : LLVM_OneResultOp<"fcmp", [NoSideEffect]>, - Arguments<(ins FCmpPredicate:$predicate, LLVM_Type:$lhs, - LLVM_Type:$rhs)> { + Arguments<(ins FCmpPredicate:$predicate, + LLVM_ScalarOrVectorOf:$lhs, + LLVM_ScalarOrVectorOf:$rhs)> { let llvmBuilder = [{ $res = builder.CreateFCmp(getLLVMCmpPredicate($predicate), $lhs, $rhs); }]; @@ -205,12 +218,13 @@ def LLVM_FCmpOp : LLVM_OneResultOp<"fcmp", [NoSideEffect]>, } // Floating point binary operations. -def LLVM_FAddOp : LLVM_ArithmeticOp<"fadd", "CreateFAdd">; -def LLVM_FSubOp : LLVM_ArithmeticOp<"fsub", "CreateFSub">; -def LLVM_FMulOp : LLVM_ArithmeticOp<"fmul", "CreateFMul">; -def LLVM_FDivOp : LLVM_ArithmeticOp<"fdiv", "CreateFDiv">; -def LLVM_FRemOp : LLVM_ArithmeticOp<"frem", "CreateFRem">; -def LLVM_FNegOp : LLVM_UnaryArithmeticOp<"fneg", "CreateFNeg">; +def LLVM_FAddOp : LLVM_FloatArithmeticOp<"fadd", "CreateFAdd">; +def LLVM_FSubOp : LLVM_FloatArithmeticOp<"fsub", "CreateFSub">; +def LLVM_FMulOp : LLVM_FloatArithmeticOp<"fmul", "CreateFMul">; +def LLVM_FDivOp : LLVM_FloatArithmeticOp<"fdiv", "CreateFDiv">; +def LLVM_FRemOp : LLVM_FloatArithmeticOp<"frem", "CreateFRem">; +def LLVM_FNegOp : LLVM_UnaryArithmeticOp, + "fneg", "CreateFNeg">; // Common code definition that is used to verify and set the alignment attribute // of LLVM ops that accept such an attribute. @@ -241,7 +255,8 @@ class MemoryOpWithAlignmentAndAttributes : MemoryOpWithAlignmentBase { def LLVM_AllocaOp : MemoryOpWithAlignmentBase, LLVM_OneResultOp<"alloca">, - Arguments<(ins LLVM_Type:$arraySize, OptionalAttr:$alignment)> { + Arguments<(ins LLVM_AnyInteger:$arraySize, + OptionalAttr:$alignment)> { string llvmBuilder = [{ auto *inst = builder.CreateAlloca( $_resultType->getPointerElementType(), $arraySize); @@ -259,8 +274,11 @@ def LLVM_AllocaOp : let parser = [{ return parseAllocaOp(parser, result); }]; let printer = [{ printAllocaOp(p, *this); }]; } + def LLVM_GEPOp : LLVM_OneResultOp<"getelementptr", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$base, Variadic:$indices)>, + Arguments<(ins LLVM_ScalarOrVectorOf:$base, + Variadic>:$indices)>, LLVM_Builder<"$res = builder.CreateGEP($base, $indices);"> { let assemblyFormat = [{ $base `[` $indices `]` attr-dict `:` functional-type(operands, results) @@ -269,7 +287,7 @@ def LLVM_GEPOp : LLVM_OneResultOp<"getelementptr", [NoSideEffect]>, def LLVM_LoadOp : MemoryOpWithAlignmentAndAttributes, LLVM_OneResultOp<"load">, - Arguments<(ins LLVM_Type:$addr, + Arguments<(ins LLVM_PointerTo:$addr, OptionalAttr:$alignment, UnitAttr:$volatile_, UnitAttr:$nontemporal)> { @@ -296,8 +314,8 @@ def LLVM_LoadOp : def LLVM_StoreOp : MemoryOpWithAlignmentAndAttributes, LLVM_ZeroResultOp<"store">, - Arguments<(ins LLVM_Type:$value, - LLVM_Type:$addr, + Arguments<(ins LLVM_LoadableType:$value, + LLVM_PointerTo:$addr, OptionalAttr:$alignment, UnitAttr:$volatile_, UnitAttr:$nontemporal)> { @@ -314,28 +332,41 @@ def LLVM_StoreOp : } // Casts. -class LLVM_CastOp traits = []> : LLVM_OneResultOp, - Arguments<(ins LLVM_Type:$arg)>, + Arguments<(ins type:$arg)>, LLVM_Builder<"$res = builder." # builderFunc # "($arg, $_resultType);"> { let parser = [{ return mlir::impl::parseCastOp(parser, result); }]; let printer = [{ mlir::impl::printCastOp(this->getOperation(), p); }]; } -def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "CreateBitCast">; -def LLVM_AddrSpaceCastOp : LLVM_CastOp<"addrspacecast", "CreateAddrSpaceCast">; -def LLVM_IntToPtrOp : LLVM_CastOp<"inttoptr", "CreateIntToPtr">; -def LLVM_PtrToIntOp : LLVM_CastOp<"ptrtoint", "CreatePtrToInt">; -def LLVM_SExtOp : LLVM_CastOp<"sext", "CreateSExt">; -def LLVM_ZExtOp : LLVM_CastOp<"zext", "CreateZExt">; -def LLVM_TruncOp : LLVM_CastOp<"trunc", "CreateTrunc">; -def LLVM_SIToFPOp : LLVM_CastOp<"sitofp", "CreateSIToFP">; -def LLVM_UIToFPOp : LLVM_CastOp<"uitofp", "CreateUIToFP">; -def LLVM_FPToSIOp : LLVM_CastOp<"fptosi", "CreateFPToSI">; -def LLVM_FPToUIOp : LLVM_CastOp<"fptoui", "CreateFPToUI">; -def LLVM_FPExtOp : LLVM_CastOp<"fpext", "CreateFPExt">; -def LLVM_FPTruncOp : LLVM_CastOp<"fptrunc", "CreateFPTrunc">; +def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "CreateBitCast", + LLVM_AnyNonAggregate>; +def LLVM_AddrSpaceCastOp : LLVM_CastOp<"addrspacecast", "CreateAddrSpaceCast", + LLVM_ScalarOrVectorOf>; +def LLVM_IntToPtrOp : LLVM_CastOp<"inttoptr", "CreateIntToPtr", + LLVM_ScalarOrVectorOf>; +def LLVM_PtrToIntOp : LLVM_CastOp<"ptrtoint", "CreatePtrToInt", + LLVM_ScalarOrVectorOf>; +def LLVM_SExtOp : LLVM_CastOp<"sext", "CreateSExt", + LLVM_ScalarOrVectorOf>; +def LLVM_ZExtOp : LLVM_CastOp<"zext", "CreateZExt", + LLVM_ScalarOrVectorOf>; +def LLVM_TruncOp : LLVM_CastOp<"trunc", "CreateTrunc", + LLVM_ScalarOrVectorOf>; +def LLVM_SIToFPOp : LLVM_CastOp<"sitofp", "CreateSIToFP", + LLVM_ScalarOrVectorOf>; +def LLVM_UIToFPOp : LLVM_CastOp<"uitofp", "CreateUIToFP", + LLVM_ScalarOrVectorOf>; +def LLVM_FPToSIOp : LLVM_CastOp<"fptosi", "CreateFPToSI", + LLVM_ScalarOrVectorOf>; +def LLVM_FPToUIOp : LLVM_CastOp<"fptoui", "CreateFPToUI", + LLVM_ScalarOrVectorOf>; +def LLVM_FPExtOp : LLVM_CastOp<"fpext", "CreateFPExt", + LLVM_ScalarOrVectorOf>; +def LLVM_FPTruncOp : LLVM_CastOp<"fptrunc", "CreateFPTrunc", + LLVM_ScalarOrVectorOf>; // Call-related operations. def LLVM_InvokeOp : LLVM_Op<"invoke", [ @@ -404,8 +435,8 @@ def LLVM_CallOp : LLVM_Op<"call">, let printer = [{ printCallOp(p, *this); }]; } def LLVM_ExtractElementOp : LLVM_OneResultOp<"extractelement", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$vector, - LLVM_Type:$position)> { + Arguments<(ins LLVM_AnyVector:$vector, + LLVM_AnyInteger:$position)> { string llvmBuilder = [{ $res = builder.CreateExtractElement($vector, $position); }]; @@ -416,8 +447,8 @@ def LLVM_ExtractElementOp : LLVM_OneResultOp<"extractelement", [NoSideEffect]>, let printer = [{ printExtractElementOp(p, *this); }]; } def LLVM_ExtractValueOp : LLVM_OneResultOp<"extractvalue", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$container, - ArrayAttr:$position)> { + Arguments<(ins LLVM_AnyAggregate:$container, + ArrayAttr:$position)> { string llvmBuilder = [{ $res = builder.CreateExtractValue($container, extractPosition($position)); }]; @@ -425,8 +456,9 @@ def LLVM_ExtractValueOp : LLVM_OneResultOp<"extractvalue", [NoSideEffect]>, let printer = [{ printExtractValueOp(p, *this); }]; } def LLVM_InsertElementOp : LLVM_OneResultOp<"insertelement", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$vector, LLVM_Type:$value, - LLVM_Type:$position)> { + Arguments<(ins LLVM_AnyVector:$vector, + LLVM_PrimitiveType:$value, + LLVM_AnyInteger:$position)> { string llvmBuilder = [{ $res = builder.CreateInsertElement($vector, $value, $position); }]; @@ -434,8 +466,9 @@ def LLVM_InsertElementOp : LLVM_OneResultOp<"insertelement", [NoSideEffect]>, let printer = [{ printInsertElementOp(p, *this); }]; } def LLVM_InsertValueOp : LLVM_OneResultOp<"insertvalue", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$container, LLVM_Type:$value, - ArrayAttr:$position)> { + Arguments<(ins LLVM_AnyAggregate:$container, + LLVM_PrimitiveType:$value, + ArrayAttr:$position)> { string llvmBuilder = [{ $res = builder.CreateInsertValue($container, $value, extractPosition($position)); @@ -451,7 +484,7 @@ def LLVM_InsertValueOp : LLVM_OneResultOp<"insertvalue", [NoSideEffect]>, } def LLVM_ShuffleVectorOp : LLVM_OneResultOp<"shufflevector", [NoSideEffect]>, - Arguments<(ins LLVM_Type:$v1, LLVM_Type:$v2, ArrayAttr:$mask)> { + Arguments<(ins LLVM_AnyVector:$v1, LLVM_AnyVector:$v2, ArrayAttr:$mask)> { string llvmBuilder = [{ SmallVector position = extractPosition($mask); SmallVector mask(position.begin(), position.end()); @@ -478,8 +511,9 @@ def LLVM_ShuffleVectorOp def LLVM_SelectOp : LLVM_OneResultOp<"select", [NoSideEffect, AllTypesMatch<["trueValue", "falseValue", "res"]>]>, - Arguments<(ins LLVM_Type:$condition, LLVM_Type:$trueValue, - LLVM_Type:$falseValue)>, + Arguments<(ins LLVM_ScalarOrVectorOf:$condition, + LLVM_Type:$trueValue, + LLVM_Type:$falseValue)>, LLVM_Builder< "$res = builder.CreateSelect($condition, $trueValue, $falseValue);"> { let builders = [OpBuilder< @@ -508,7 +542,7 @@ def LLVM_BrOp : LLVM_TerminatorOp<"br", def LLVM_CondBrOp : LLVM_TerminatorOp<"cond_br", [AttrSizedOperandSegments, DeclareOpInterfaceMethods, NoSideEffect]> { - let arguments = (ins LLVMI1:$condition, + let arguments = (ins LLVM_i1:$condition, Variadic:$trueDestOperands, Variadic:$falseDestOperands, OptionalAttr:$branch_weights); @@ -1090,9 +1124,11 @@ def AtomicOrdering : I64EnumAttr< let cppNamespace = "::mlir::LLVM"; } +def LLVM_AtomicRMWType : AnyTypeOf<[LLVM_AnyFloat, LLVM_AnyInteger]>; + def LLVM_AtomicRMWOp : LLVM_Op<"atomicrmw">, - Arguments<(ins AtomicBinOp:$bin_op, LLVM_Type:$ptr, LLVM_Type:$val, - AtomicOrdering:$ordering)>, + Arguments<(ins AtomicBinOp:$bin_op, LLVM_PointerTo:$ptr, + LLVM_AtomicRMWType:$val, AtomicOrdering:$ordering)>, Results<(outs LLVM_Type:$res)> { let llvmBuilder = [{ $res = builder.CreateAtomicRMW(getLLVMAtomicBinOp($bin_op), $ptr, $val, @@ -1103,8 +1139,11 @@ def LLVM_AtomicRMWOp : LLVM_Op<"atomicrmw">, let verifier = "return ::verify(*this);"; } +def LLVM_AtomicCmpXchgType : AnyTypeOf<[LLVM_AnyInteger, LLVM_AnyPointer]>; + def LLVM_AtomicCmpXchgOp : LLVM_Op<"cmpxchg">, - Arguments<(ins LLVM_Type:$ptr, LLVM_Type:$cmp, LLVM_Type:$val, + Arguments<(ins LLVM_PointerTo:$ptr, + LLVM_AtomicCmpXchgType:$cmp, LLVM_AtomicCmpXchgType:$val, AtomicOrdering:$success_ordering, AtomicOrdering:$failure_ordering)>, Results<(outs LLVM_Type:$res)> { diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 96d8459e5b3d4..63bd10c2e6f13 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -1533,8 +1533,6 @@ static ParseResult parseAtomicRMWOp(OpAsmParser &parser, static LogicalResult verify(AtomicRMWOp op) { auto ptrType = op.ptr().getType().cast(); - if (!ptrType.isPointerTy()) - return op.emitOpError("expected LLVM IR pointer type for operand #0"); auto valType = op.val().getType().cast(); if (valType != ptrType.getPointerElementTy()) return op.emitOpError("expected LLVM IR element type for operand #0 to " diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index e27650b3297dd..a89287b764e5d 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -440,7 +440,8 @@ LogicalResult LLVMStructType::setBody(ArrayRef types, bool isPacked) { bool LLVMStructType::isPacked() { return getImpl()->isPacked(); } bool LLVMStructType::isIdentified() { return getImpl()->isIdentified(); } bool LLVMStructType::isOpaque() { - return getImpl()->isOpaque() || !getImpl()->isInitialized(); + return getImpl()->isIdentified() && + (getImpl()->isOpaque() || !getImpl()->isInitialized()); } bool LLVMStructType::isInitialized() { return getImpl()->isInitialized(); } StringRef LLVMStructType::getName() { return getImpl()->getIdentifier(); } diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 1f8b1600873c9..c19795e98b686 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -394,7 +394,7 @@ func @nvvm_invalid_mma_7(%a0 : !llvm.vec<2 x half>, %a1 : !llvm.vec<2 x half>, // CHECK-LABEL: @atomicrmw_expected_ptr func @atomicrmw_expected_ptr(%f32 : !llvm.float) { - // expected-error@+1 {{expected LLVM IR pointer type for operand #0}} + // expected-error@+1 {{operand #0 must be LLVM pointer to floating point LLVM type or LLVM integer type}} %0 = "llvm.atomicrmw"(%f32, %f32) {bin_op=11, ordering=1} : (!llvm.float, !llvm.float) -> !llvm.float llvm.return } @@ -448,7 +448,7 @@ func @atomicrmw_expected_int(%f32_ptr : !llvm.ptr, %f32 : !llvm.float) { // CHECK-LABEL: @cmpxchg_expected_ptr func @cmpxchg_expected_ptr(%f32_ptr : !llvm.ptr, %f32 : !llvm.float) { - // expected-error@+1 {{expected LLVM IR pointer type for operand #0}} + // expected-error@+1 {{op operand #0 must be LLVM pointer to LLVM integer type or LLVM pointer type}} %0 = "llvm.cmpxchg"(%f32, %f32, %f32) {success_ordering=2,failure_ordering=2} : (!llvm.float, !llvm.float, !llvm.float) -> !llvm.struct<(float, i1)> llvm.return } From e2fc6a31d347dc96c2dec6acb72045150f525630 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Sep 2020 08:44:57 +0100 Subject: [PATCH 172/465] [MemCpyOpt] Preserve MemorySSA. This patch updates MemCpyOpt to preserve MemorySSA. It uses the MemoryDef at the insertion point of the builder and inserts the new def after that def. In some cases, we just modify a memory instruction. In that case, get the defining access, then remove the memory access and add a new one. If the defining access is in a different block, insert a new def at the beginning of the current block, otherwise after the defining access. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D86651 --- .../llvm/Transforms/Scalar/MemCpyOptimizer.h | 5 +- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 157 ++++++++++++++++-- .../Transforms/MemCpyOpt/preserve-memssa.ll | 139 ++++++++++++++++ 3 files changed, 285 insertions(+), 16 deletions(-) create mode 100644 llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 3e278b31910f4..89a2e24af288b 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -30,6 +30,8 @@ class Instruction; class MemCpyInst; class MemMoveInst; class MemoryDependenceResults; +class MemorySSA; +class MemorySSAUpdater; class MemSetInst; class StoreInst; class TargetLibraryInfo; @@ -41,6 +43,7 @@ class MemCpyOptPass : public PassInfoMixin { AliasAnalysis *AA = nullptr; AssumptionCache *AC = nullptr; DominatorTree *DT = nullptr; + MemorySSAUpdater *MSSAU = nullptr; public: MemCpyOptPass() = default; @@ -50,7 +53,7 @@ class MemCpyOptPass : public PassInfoMixin { // Glue for the old PM. bool runImpl(Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, - AssumptionCache *AC_, DominatorTree *DT_); + AssumptionCache *AC_, DominatorTree *DT_, MemorySSA *MSSA_); private: // Helper functions diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index d2a8f5e08d684..aff08a2c366c9 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -23,6 +23,8 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -278,6 +280,7 @@ class MemCpyOptLegacyPass : public FunctionPass { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); } }; @@ -315,7 +318,27 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, MemsetRanges Ranges(DL); BasicBlock::iterator BI(StartInst); + + // Keeps track of the last memory use or def before the insertion point for + // the new memset. The new MemoryDef for the inserted memsets will be inserted + // after MemInsertPoint. It points to either LastMemDef or to the last user + // before the insertion point of the memset, if there are any such users. + MemoryUseOrDef *MemInsertPoint = nullptr; + // Keeps track of the last MemoryDef between StartInst and the insertion point + // for the new memset. This will become the defining access of the inserted + // memsets. + MemoryDef *LastMemDef = nullptr; for (++BI; !BI->isTerminator(); ++BI) { + if (MSSAU) { + auto *CurrentAcc = cast_or_null( + MSSAU->getMemorySSA()->getMemoryAccess(&*BI)); + if (CurrentAcc) { + MemInsertPoint = CurrentAcc; + if (auto *CurrentDef = dyn_cast(CurrentAcc)) + LastMemDef = CurrentDef; + } + } + if (!isa(BI) && !isa(BI)) { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: @@ -394,15 +417,27 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, : Range.TheStores) dbgs() << *SI << '\n'; dbgs() << "With: " << *AMemSet << '\n'); - if (!Range.TheStores.empty()) AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); + if (MSSAU) { + assert(LastMemDef && MemInsertPoint && + "Both LastMemDef and MemInsertPoint need to be set"); + auto *NewDef = cast( + MSSAU->createMemoryAccessAfter(AMemSet, LastMemDef, MemInsertPoint)); + MSSAU->insertDef(NewDef, /*RenameUses=*/true); + LastMemDef = NewDef; + MemInsertPoint = NewDef; + } + // Zap all the stores. for (Instruction *SI : Range.TheStores) { + if (MSSAU) + MSSAU->removeMemoryAccess(SI); MD->removeInstruction(SI); SI->eraseFromParent(); } + ++NumMemSetInfer; } @@ -573,6 +608,17 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); + if (MSSAU) { + assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(P))); + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(P)); + auto *NewAccess = + MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + MSSAU->removeMemoryAccess(SI); + MSSAU->removeMemoryAccess(LI); + } + MD->removeInstruction(SI); SI->eraseFromParent(); MD->removeInstruction(LI); @@ -621,6 +667,11 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { DL.getTypeStoreSize(SI->getOperand(0)->getType()), commonAlignment(SI->getAlign(), LI->getAlign()), C); if (changed) { + if (MSSAU) { + MSSAU->removeMemoryAccess(SI); + MSSAU->removeMemoryAccess(LI); + } + MD->removeInstruction(SI); SI->eraseFromParent(); MD->removeInstruction(LI); @@ -658,6 +709,15 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); + if (MSSAU) { + assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(SI))); + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(SI)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + MSSAU->removeMemoryAccess(SI); + } + MD->removeInstruction(SI); SI->eraseFromParent(); NumMemSetInfer++; @@ -939,14 +999,23 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. IRBuilder<> Builder(M); + Instruction *NewM; if (UseMemMove) - Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(), - MDep->getRawSource(), MDep->getSourceAlign(), - M->getLength(), M->isVolatile()); + NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(), + MDep->getRawSource(), MDep->getSourceAlign(), + M->getLength(), M->isVolatile()); else - Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(), - MDep->getRawSource(), MDep->getSourceAlign(), - M->getLength(), M->isVolatile()); + NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(), + MDep->getRawSource(), MDep->getSourceAlign(), + M->getLength(), M->isVolatile()); + + if (MSSAU) { + assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(M))); + auto *LastDef = cast(MSSAU->getMemorySSA()->getMemoryAccess(M)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + MSSAU->removeMemoryAccess(M); + } // Remove the instruction we're replacing. MD->removeInstruction(M); @@ -1012,11 +1081,25 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize); Value *MemsetLen = Builder.CreateSelect( Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff); - Builder.CreateMemSet( + Instruction *NewMemSet = Builder.CreateMemSet( Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest, SrcSize), MemSet->getOperand(1), MemsetLen, MaybeAlign(Align)); + if (MSSAU) { + assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && + "MemCpy must be a MemoryDef"); + // The new memset is inserted after the memcpy, but it is known that its + // defining access is the memset about to be removed which immediately + // precedes the memcpy. + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *NewAccess = MSSAU->createMemoryAccessBefore( + NewMemSet, LastDef->getDefiningAccess(), LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + MSSAU->removeMemoryAccess(MemSet); + } + MD->removeInstruction(MemSet); MemSet->eraseFromParent(); return true; @@ -1081,8 +1164,16 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, } IRBuilder<> Builder(MemCpy); - Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), CopySize, - MaybeAlign(MemCpy->getDestAlignment())); + Instruction *NewM = + Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), + CopySize, MaybeAlign(MemCpy->getDestAlignment())); + if (MSSAU) { + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + } + return true; } @@ -1098,6 +1189,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { // If the source and destination of the memcpy are the same, then zap it. if (M->getSource() == M->getDest()) { ++BBI; + if (MSSAU) + MSSAU->removeMemoryAccess(M); + MD->removeInstruction(M); M->eraseFromParent(); return true; @@ -1109,8 +1203,18 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { if (Value *ByteVal = isBytewiseValue(GV->getInitializer(), M->getModule()->getDataLayout())) { IRBuilder<> Builder(M); - Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), - MaybeAlign(M->getDestAlignment()), false); + Instruction *NewM = + Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), + MaybeAlign(M->getDestAlignment()), false); + if (MSSAU) { + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(M)); + auto *NewAccess = + MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); + MSSAU->removeMemoryAccess(M); + } + MD->removeInstruction(M); M->eraseFromParent(); ++NumCpyToSet; @@ -1145,6 +1249,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { M->getSourceAlign().valueOrOne()); if (performCallSlotOptzn(M, M->getDest(), M->getSource(), CopySize->getZExtValue(), Alignment, C)) { + if (MSSAU) + MSSAU->removeMemoryAccess(M); + MD->removeInstruction(M); M->eraseFromParent(); return true; @@ -1161,6 +1268,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { return processMemCpyMemCpyDependence(M, MDep); } else if (SrcDepInfo.isDef()) { if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { + if (MSSAU) + MSSAU->removeMemoryAccess(M); + MD->removeInstruction(M); M->eraseFromParent(); ++NumMemCpyInstr; @@ -1171,6 +1281,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { if (SrcDepInfo.isClobber()) if (MemSetInst *MDep = dyn_cast(SrcDepInfo.getInst())) if (performMemCpyToMemSetOptzn(M, MDep)) { + if (MSSAU) + MSSAU->removeMemoryAccess(M); MD->removeInstruction(M); M->eraseFromParent(); ++NumCpyToSet; @@ -1201,6 +1313,9 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) { M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(), Intrinsic::memcpy, ArgTys)); + // For MemorySSA nothing really changes (except that memcpy may imply stricter + // aliasing guarantees). + // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. MD->removeInstruction(M); @@ -1338,8 +1453,10 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { auto *AA = &AM.getResult(F); auto *AC = &AM.getResult(F); auto *DT = &AM.getResult(F); + auto *MSSA = AM.getCachedResult(F); - bool MadeChange = runImpl(F, &MD, &TLI, AA, AC, DT); + bool MadeChange = + runImpl(F, &MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); if (!MadeChange) return PreservedAnalyses::all(); @@ -1347,18 +1464,23 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { PA.preserveSet(); PA.preserve(); PA.preserve(); + if (MSSA) + PA.preserve(); return PA; } bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, - AssumptionCache *AC_, DominatorTree *DT_) { + AssumptionCache *AC_, DominatorTree *DT_, + MemorySSA *MSSA_) { bool MadeChange = false; MD = MD_; TLI = TLI_; AA = AA_; AC = AC_; DT = DT_; + MemorySSAUpdater MSSAU_(MSSA_); + MSSAU = MSSA_ ? &MSSAU_ : nullptr; // If we don't have at least memset and memcpy, there is little point of doing // anything here. These are required by a freestanding implementation, so if // even they are disabled, there is no point in trying hard. @@ -1371,6 +1493,9 @@ bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, MadeChange = true; } + if (MSSA_ && VerifyMemorySSA) + MSSA_->verifyMemorySSA(); + MD = nullptr; return MadeChange; } @@ -1385,6 +1510,8 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) { auto *AA = &getAnalysis().getAAResults(); auto *AC = &getAnalysis().getAssumptionCache(F); auto *DT = &getAnalysis().getDomTree(); + auto *MSSAWP = getAnalysisIfAvailable(); - return Impl.runImpl(F, MD, TLI, AA, AC, DT); + return Impl.runImpl(F, MD, TLI, AA, AC, DT, + MSSAWP ? &MSSAWP->getMSSA() : nullptr); } diff --git a/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll b/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll new file mode 100644 index 0000000000000..4e449ed5ff4f9 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -aa-pipeline=basic-aa -passes='require,memcpyopt' -verify-memoryssa -S %s | FileCheck %s + +; REQUIRES: asserts + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.15.0" + +%t = type <{ i8*, [4 x i8], i8*, i8*, i32, [8192 x i8] }> + + +define i32 @test1(%t* %ptr) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: invoke.cont6: +; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds [[T:%.*]], %t* [[PTR:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[P_1_C:%.*]] = bitcast i8** [[P_1]] to i8* +; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds [[T]], %t* [[PTR]], i64 0, i32 4 +; CHECK-NEXT: [[P_3:%.*]] = getelementptr inbounds [[T]], %t* [[PTR]], i64 0, i32 5, i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8** [[P_1]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 20, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P_2]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 0, i64 8195, i1 false) +; CHECK-NEXT: ret i32 0 +; +invoke.cont6: + %p.1 = getelementptr inbounds %t, %t* %ptr, i64 0, i32 0 + %p.1.c = bitcast i8** %p.1 to i8* + call void @llvm.memset.p0i8.i64(i8* %p.1.c, i8 0, i64 20, i1 false) + store i8* null, i8** %p.1, align 8 + %p.2 = getelementptr inbounds %t, %t* %ptr, i64 0, i32 4 + store i32 0, i32* %p.2, align 8 + %p.3 = getelementptr inbounds %t, %t* %ptr, i64 0, i32 5, i64 0 + call void @llvm.memset.p0i8.i64(i8* %p.3, i8 0, i64 8191, i1 false) + ret i32 0 +} + +declare i8* @get_ptr() + +define void @test2(i8 *%in) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL_I1_I:%.*]] = tail call i8* @get_ptr() +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* [[CALL_I1_I]], i64 10 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP0]], i8 0, i64 0, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CALL_I1_I]], i8* [[IN:%.*]], i64 10, i1 false) +; CHECK-NEXT: ret void +; +entry: + %call.i1.i = tail call i8* @get_ptr() + tail call void @llvm.memset.p0i8.i64(i8* %call.i1.i, i8 0, i64 10, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call.i1.i, i8* %in, i64 10, i1 false) + ret void +} + +declare i8* @malloc(i64) + +define i32 @test3(i8* noalias %in) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call i8* @malloc(i64 20) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CALL_I_I_I]], i8* [[IN:%.*]], i64 20, i1 false) +; CHECK-NEXT: ret i32 10 +; + %call.i.i.i = tail call i8* @malloc(i64 20) + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call.i.i.i, i8* %in, i64 20, i1 false) + ret i32 10 +} + +define void @test4(i32 %n, i8* noalias %ptr.0, i8* noalias %ptr.1, i32* %ptr.2) unnamed_addr { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[ELEM_I:%.*]] = getelementptr i8, i8* [[PTR_0:%.*]], i64 8 +; CHECK-NEXT: store i32 [[N:%.*]], i32* [[PTR_2:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[ELEM_I]], i64 10 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP1]], i8 0, i64 0, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ELEM_I]], i8* [[PTR_1:%.*]], i64 10, i1 false) +; CHECK-NEXT: ret void +; + %elem.i = getelementptr i8, i8* %ptr.0, i64 8 + call void @llvm.memset.p0i8.i64(i8* %elem.i, i8 0, i64 10, i1 false) + store i32 %n, i32* %ptr.2, align 8 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %elem.i, i8* %ptr.1, i64 10, i1 false) + ret void +} + +declare void @decompose(%t* nocapture) + +define void @test5(i32* %ptr) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EARLY_DATA:%.*]] = alloca [128 x i8], align 8 +; CHECK-NEXT: [[TMP:%.*]] = alloca [[T:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast [128 x i8]* [[EARLY_DATA]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %t* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 32, i8* [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[PTR:%.*]], align 8 +; CHECK-NEXT: call fastcc void @decompose(%t* [[TMP]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i1 false) +; CHECK-NEXT: ret void +; +entry: + %early_data = alloca [128 x i8], align 8 + %tmp = alloca %t, align 8 + %0 = bitcast [128 x i8]* %early_data to i8* + %1 = bitcast %t* %tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 32, i8* %0) + %2 = load i32, i32* %ptr, align 8 + call fastcc void @decompose(%t* %tmp) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i1 false) + ret void +} + +define i8 @test6(i8* %ptr, i8* noalias %ptr.1) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 24, i8* [[PTR:%.*]]) +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[PTR]], align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[PTR]], i8* [[PTR_1:%.*]], i64 24, i1 false) +; CHECK-NEXT: ret i8 [[TMP0]] +; +entry: + call void @llvm.lifetime.start.p0i8(i64 24, i8* %ptr) + %0 = load i8, i8* %ptr, align 8 + call void @llvm.memmove.p0i8.p0i8.i64(i8* %ptr, i8* %ptr.1, i64 24, i1 false) + ret i8 %0 +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0 + +; Function Attrs: argmemonly nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg) #0 + +attributes #0 = { argmemonly nounwind willreturn } +attributes #1 = { argmemonly nounwind willreturn writeonly } From 48ac5b4833b60f00f0923db11ea31e7316bc78c6 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 4 Sep 2020 01:17:18 -0700 Subject: [PATCH 173/465] [NFC][Asan] Reformat some allocator code --- compiler-rt/lib/asan/asan_allocator.cpp | 36 ++++++++++--------- .../sanitizer_allocator_combined.h | 1 - .../sanitizer_allocator_primary64.h | 9 ++--- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index cb9f49f73a32c..52033821ffdac 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -73,13 +73,13 @@ static const uptr kAllocBegMagic = 0xCC6E96B9; struct ChunkHeader { // 1-st 8 bytes. atomic_uint8_t chunk_state; - u32 alloc_tid : 24; + u32 alloc_tid : 24; - u32 free_tid : 24; - u32 from_memalign : 1; - u32 alloc_type : 2; - u32 rz_log : 3; - u32 lsan_tag : 2; + u32 free_tid : 24; + u32 from_memalign : 1; + u32 alloc_type : 2; + u32 rz_log : 3; + u32 lsan_tag : 2; // 2-nd 8 bytes // This field is used for small sizes. For large sizes it is equal to // SizeClassMap::kMaxSize and the actual size is stored in the @@ -109,7 +109,7 @@ enum { // The chunk is allocated and not yet freed. CHUNK_ALLOCATED = 2, // The chunk was freed and put into quarantine zone. - CHUNK_QUARANTINE = 3 + CHUNK_QUARANTINE = 3, }; struct AsanChunk: ChunkBase { @@ -118,7 +118,7 @@ struct AsanChunk: ChunkBase { if (user_requested_size != SizeClassMap::kMaxSize) return user_requested_size; return *reinterpret_cast( - get_allocator().GetMetaData(AllocBeg(locked_version))); + get_allocator().GetMetaData(AllocBeg(locked_version))); } void *AllocBeg(bool locked_version = false) { if (from_memalign) { @@ -519,7 +519,7 @@ struct Allocator { m->free_tid = kInvalidTid; m->from_memalign = user_beg != beg_plus_redzone; if (alloc_beg != chunk_beg) { - CHECK_LE(alloc_beg+ 2 * sizeof(uptr), chunk_beg); + CHECK_LE(alloc_beg + 2 * sizeof(uptr), chunk_beg); reinterpret_cast(alloc_beg)[0] = kAllocBegMagic; reinterpret_cast(alloc_beg)[1] = chunk_beg; } @@ -735,7 +735,8 @@ struct Allocator { // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). AsanChunk *GetAsanChunk(void *alloc_beg) { - if (!alloc_beg) return nullptr; + if (!alloc_beg) + return nullptr; if (!allocator.FromPrimary(alloc_beg)) { uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); AsanChunk *m = reinterpret_cast(meta[1]); @@ -751,11 +752,13 @@ struct Allocator { } AsanChunk *GetAsanChunkDebug(void *alloc_beg) { - if (!alloc_beg) return nullptr; + if (!alloc_beg) + return nullptr; if (!allocator.FromPrimary(alloc_beg)) { uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); AsanChunk *m = reinterpret_cast(meta[1]); - Printf("GetAsanChunkDebug1 alloc_beg %p meta %p m %p\n", alloc_beg, meta, m); + Printf("GetAsanChunkDebug1 alloc_beg %p meta %p m %p\n", alloc_beg, meta, + m); return m; } uptr *alloc_magic = reinterpret_cast(alloc_beg); @@ -768,7 +771,6 @@ struct Allocator { return reinterpret_cast(alloc_beg); } - AsanChunk *GetAsanChunkByAddr(uptr p) { void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast(p)); return GetAsanChunk(alloc_beg); @@ -784,7 +786,8 @@ struct Allocator { AsanChunk *GetAsanChunkByAddrFastLockedDebug(uptr p) { void *alloc_beg = allocator.GetBlockBeginFastLockedDebug(reinterpret_cast(p)); - Printf("GetAsanChunkByAddrFastLockedDebug p %p alloc_beg %p\n", p, alloc_beg); + Printf("GetAsanChunkByAddrFastLockedDebug p %p alloc_beg %p\n", p, + alloc_beg); return GetAsanChunkDebug(alloc_beg); } @@ -1055,7 +1058,7 @@ void AsanSoftRssLimitExceededCallback(bool limit_exceeded) { instance.SetRssLimitExceeded(limit_exceeded); } -} // namespace __asan +} // namespace __asan // --- Implementation of LSan-specific functions --- {{{1 namespace __lsan { @@ -1092,7 +1095,8 @@ extern "C" SANITIZER_WEAK_ATTRIBUTE const char *__lsan_current_stage; void GetUserBeginDebug(uptr chunk) { Printf("GetUserBeginDebug1 chunk %p\n", chunk); - __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLockedDebug(chunk); + __asan::AsanChunk *m = + __asan::instance.GetAsanChunkByAddrFastLockedDebug(chunk); Printf("GetUserBeginDebug2 m %p\n", m); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h index 6d73784d77d09..0cf483da1e5c8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h @@ -148,7 +148,6 @@ class CombinedAllocator { return secondary_.GetBlockBeginFastLocked(p); } - uptr GetActuallyAllocatedSize(void *p) { if (primary_.PointerIsMine(p)) return primary_.GetActuallyAllocatedSize(p); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h index 7af469c56fd6a..a6126fc6265eb 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h @@ -203,7 +203,8 @@ class SizeClassAllocator64 { uptr class_id = GetSizeClass(p); uptr size = ClassIdToSize(class_id); Printf("GetBlockBeginDebug1 p %p class_id %p size %p\n", p, class_id, size); - if (!size) return nullptr; + if (!size) + return nullptr; uptr chunk_idx = GetChunkIdx((uptr)p, size); uptr reg_beg = GetRegionBegin(p); uptr beg = chunk_idx * size; @@ -212,16 +213,16 @@ class SizeClassAllocator64 { "GetBlockBeginDebug2 chunk_idx %p reg_beg %p beg %p next_beg %p " "kNumClasses %p\n", chunk_idx, reg_beg, beg, next_beg, kNumClasses); - if (class_id >= kNumClasses) return nullptr; + if (class_id >= kNumClasses) + return nullptr; const RegionInfo *region = AddressSpaceView::Load(GetRegionInfo(class_id)); Printf("GetBlockBeginDebug3 region %p region->mapped_user %p\n", region, region->mapped_user); if (region->mapped_user >= next_beg) - return reinterpret_cast(reg_beg + beg); + return reinterpret_cast(reg_beg + beg); return nullptr; } - uptr GetActuallyAllocatedSize(void *p) { CHECK(PointerIsMine(p)); return ClassIdToSize(GetSizeClass(p)); From ab86e64a96624e99df2f2e6183aef5fe7b2a081a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Sep 2020 09:39:40 +0100 Subject: [PATCH 174/465] [DSE] Remove some dead code from DSE tests. Some tests depend on DSE removing dead instructions unrelated to any memory optimization. That's not really DSE's job, remove it. --- llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll | 2 -- .../Transforms/DeadStoreElimination/merge-stores-big-endian.ll | 1 - llvm/test/Transforms/DeadStoreElimination/merge-stores.ll | 1 - 3 files changed, 4 deletions(-) diff --git a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll index 68943d383ba6f..2902712376930 100644 --- a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll @@ -38,7 +38,6 @@ define void @test3() { ; CHECK-NEXT: ret void ; %A = alloca i8 - %B = alloca i8 store i8 0, i8* %A ;; Written to by memset @@ -87,7 +86,6 @@ define void @test6() { ; CHECK-NEXT: ret void ; %A = alloca i16, i16 1024, align 2 - %B = alloca i16, i16 1024, align 2 store atomic i16 0, i16* %A unordered, align 2 ;; Written to by memset diff --git a/llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll b/llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll index 8d44855f2f97b..77784ac0c4047 100644 --- a/llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll +++ b/llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll @@ -40,7 +40,6 @@ entry: %wptr = bitcast i64* %ptr to i16* %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 ;; We should be able to merge these two stores with the i64 one above diff --git a/llvm/test/Transforms/DeadStoreElimination/merge-stores.ll b/llvm/test/Transforms/DeadStoreElimination/merge-stores.ll index c7f86ab29e3e0..095ad9d8530c8 100644 --- a/llvm/test/Transforms/DeadStoreElimination/merge-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/merge-stores.ll @@ -39,7 +39,6 @@ entry: %wptr = bitcast i64* %ptr to i16* %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 - %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 ;; We should be able to merge these two stores with the i64 one above From 73a3d350a47fb8211c0c912134fbbcd59abc5d56 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Fri, 21 Aug 2020 10:59:29 +0100 Subject: [PATCH 175/465] [SVE][CodeGen] Fix up warnings in sve-split-insert/extract tests I have fixed up some more ElementCount/TypeSize related warnings in the following tests: CodeGen/AArch64/sve-split-extract-elt.ll CodeGen/AArch64/sve-split-insert-elt.ll In SelectionDAG::CreateStackTemporary we were relying upon the implicit cast from TypeSize -> uint64_t when calling MachineFrameInfo::CreateStackObject. I've fixed this by passing in the known minimum size instead, which I believe is fine because the associated stack id indicates whether this is a scalable object or not. I've also fixed up a case in TargetLowering::SimplifyDemandedBits when extracting a vector element from a scalable vector. The result is a scalar, hence it wasn't caught at the start of the function. If the vector is scalable we just bail out for now. Differential Revision: https://reviews.llvm.org/D86431 --- llvm/include/llvm/Support/TypeSize.h | 9 +++++++++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +++- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 6 +++++- llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll | 6 +++++- llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll | 6 +++++- 5 files changed, 27 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index b6392e61db4b3..a0b8fbde25a96 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -116,6 +116,15 @@ class ElementCount { unsigned getKnownMinValue() const { return Min; } + // Return the minimum value with the assumption that the count is exact. + // Use in places where a scalable count doesn't make sense (e.g. non-vector + // types, or vectors in backends which don't support scalable vectors). + unsigned getFixedValue() const { + assert(!Scalable && + "Request for a fixed element count on a scalable object"); + return Min; + } + bool isScalable() const { return Scalable; } }; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ff6c642e3d4f3..73e042c475402 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2030,7 +2030,9 @@ SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) { int StackID = 0; if (Bytes.isScalable()) StackID = TFI->getStackIDForScalableVectors(); - int FrameIdx = MFI.CreateStackObject(Bytes, Alignment, + // The stack id gives an indication of whether the object is scalable or + // not, so it's safe to pass in the minimum size here. + int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinSize(), Alignment, false, nullptr, StackID); return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a7816b8616e65..ae98edb74466d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2022,10 +2022,14 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::EXTRACT_VECTOR_ELT: { SDValue Src = Op.getOperand(0); SDValue Idx = Op.getOperand(1); - unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount(); unsigned EltBitWidth = Src.getScalarValueSizeInBits(); + if (SrcEltCnt.isScalable()) + return false; + // Demand the bits from every vector element without a constant index. + unsigned NumSrcElts = SrcEltCnt.getFixedValue(); APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); if (auto *CIdx = dyn_cast(Idx)) if (CIdx->getAPIntValue().ult(NumSrcElts)) diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll index e0d99d57d8e6a..aa01dae055123 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -1,5 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning ; EXTRACT VECTOR ELT diff --git a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll index 5e6dedf4a4cc1..cb998730d2bca 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll @@ -1,5 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning ; INSERT VECTOR ELT From 4770f80a7c6f5609d3f79c3a832759c1915a73f4 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 4 Sep 2020 02:03:28 -0700 Subject: [PATCH 176/465] [NFC][Asan] Fix typo in comment --- compiler-rt/lib/asan/asan_allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 52033821ffdac..f5c273e7fc25b 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -745,7 +745,7 @@ struct Allocator { uptr *alloc_magic = reinterpret_cast(alloc_beg); if (alloc_magic[0] == kAllocBegMagic) return reinterpret_cast(alloc_magic[1]); - // FIXME: This is either valid small chunk with tiny redzine or invalid + // FIXME: This is either valid small chunk with tiny redzone or invalid // chunk which is beeing allocated/deallocated. The latter case should // return nullptr like secondary allocator does. return reinterpret_cast(alloc_beg); From 0c2a4d3c1c95c8217e9e00d5a20feed0d5c37ac5 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 4 Sep 2020 11:10:03 +0200 Subject: [PATCH 177/465] [mlir][VectorOps] Simplify code. NFCI. --- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index dfa204d17389a..a43bec855ff0a 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -134,18 +134,14 @@ static Value buildVectorComparison(ConversionPatternRewriter &rewriter, Value indices; Type idxType; if (enableIndexOptimizations) { - SmallVector values(dim); - for (int64_t d = 0; d < dim; d++) - values[d] = d; - indices = - rewriter.create(loc, rewriter.getI32VectorAttr(values)); + indices = rewriter.create( + loc, rewriter.getI32VectorAttr( + llvm::to_vector<4>(llvm::seq(0, dim)))); idxType = rewriter.getI32Type(); } else { - SmallVector values(dim); - for (int64_t d = 0; d < dim; d++) - values[d] = d; - indices = - rewriter.create(loc, rewriter.getI64VectorAttr(values)); + indices = rewriter.create( + loc, rewriter.getI64VectorAttr( + llvm::to_vector<4>(llvm::seq(0, dim)))); idxType = rewriter.getI64Type(); } // Add in an offset if requested. @@ -451,11 +447,9 @@ class VectorGatherOpConversion : public ConvertToLLVMPattern { return failure(); // Replace with the gather intrinsic. - ValueRange v = (llvm::size(adaptor.pass_thru()) == 0) ? ValueRange({}) - : adaptor.pass_thru(); rewriter.replaceOpWithNewOp( - gather, typeConverter.convertType(vType), ptrs, adaptor.mask(), v, - rewriter.getI32IntegerAttr(align)); + gather, typeConverter.convertType(vType), ptrs, adaptor.mask(), + adaptor.pass_thru(), rewriter.getI32IntegerAttr(align)); return success(); } }; @@ -1282,7 +1276,7 @@ class VectorTransferConversion : public ConvertToLLVMPattern { // dimensions here. unsigned vecWidth = vecTy.getVectorNumElements(); unsigned lastIndex = llvm::size(xferOp.indices()) - 1; - Value off = *(xferOp.indices().begin() + lastIndex); + Value off = xferOp.indices()[lastIndex]; Value dim = rewriter.create(loc, xferOp.memref(), lastIndex); Value mask = buildVectorComparison(rewriter, op, enableIndexOptimizations, vecWidth, dim, &off); From f9ad112770ece2e3612ddcdd6a4157bc0b0faf31 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Fri, 4 Sep 2020 11:25:38 +0200 Subject: [PATCH 178/465] [lldb] Speed up TestValueObjectRecursion by making it a no_debug_info_test This is one of the most expensive tests and runs for nearly half a minute on my machine. Beside this test just doing a lot of work by iterating 15k times on one ValueObject (which seems to be the point), it also runs this for every debug info variant which doesn't seem relevant to just iterating ValueObject. This marks it as no_debug_info_test to only run one debug info variation and cut down the runtime to around 7 seconds on my machine. --- .../API/functionalities/recursion/TestValueObjectRecursion.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py b/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py index e949f1a1a07e9..1653fe36af7b0 100644 --- a/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py +++ b/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py @@ -6,6 +6,7 @@ import lldb +from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * import lldbsuite.test.lldbutil as lldbutil @@ -20,6 +21,7 @@ def setUp(self): # Find the line number to break at. self.line = line_number('main.cpp', '// Set break point at this line.') + @no_debug_info_test def test_with_run_command(self): """Test that deeply nested ValueObjects still work.""" self.build() From a633da5391b0e42c0185132e8b532ae9bc34489f Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Mon, 24 Aug 2020 14:02:26 +0700 Subject: [PATCH 179/465] [FPEnv] Partially implement #pragma STDC FENV_ROUND This change implements pragma STDC FENV_ROUND, which is introduced by the extension to standard (TS 18661-1). The pragma is implemented only in frontend, it sets apprpriate state of FPOptions stored in Sema. Use of these bits in constant evaluation adn/or code generator is not in the scope of this change. Parser issues warning on unsuppored pragma when it encounteres pragma STDC FENV_ROUND, however it makes syntax checks and updates Sema state as if the pragma were supported. Primary purpose of the partial implementation is to facilitate development of non-default floating poin environment. Previously a developer cannot set non-default rounding mode in sources, this mades preparing tests for say constant evaluation substantially complicated. Differential Revision: https://reviews.llvm.org/D86921 --- .../clang/Basic/DiagnosticParseKinds.td | 6 ++ clang/include/clang/Basic/TokenKinds.def | 5 ++ clang/include/clang/Parse/Parser.h | 7 +- clang/include/clang/Sema/Sema.h | 2 +- clang/lib/Parse/ParsePragma.cpp | 80 ++++++++++++++++++- clang/lib/Parse/ParseStmt.cpp | 9 +++ clang/lib/Parse/Parser.cpp | 3 + clang/lib/Sema/SemaAttr.cpp | 5 ++ clang/test/AST/ast-dump-fpfeatures.cpp | 67 +++++++++++++++- clang/test/Parser/pragma-fenv_round.c | 11 +++ 10 files changed, 188 insertions(+), 7 deletions(-) create mode 100644 clang/test/Parser/pragma-fenv_round.c diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 0e51fef8659ea..1c8d741ab54ff 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1136,6 +1136,12 @@ def ext_stdc_pragma_ignored : ExtWarn<"unknown pragma in STDC namespace">, def warn_stdc_fenv_access_not_supported : Warning<"pragma STDC FENV_ACCESS ON is not supported, ignoring pragma">, InGroup; +def warn_stdc_fenv_round_not_supported : + Warning<"pragma STDC FENV_ROUND is not supported">, + InGroup; +def warn_stdc_unknown_rounding_mode : Warning< + "invalid or unsupported rounding mode in '#pragma STDC FENV_ROUND' - ignored">, + InGroup; // - #pragma comment def err_pragma_comment_malformed : Error< "pragma comment requires parenthesized identifier and optional string">; diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index daaa54c3db7c2..63f1cf9896db0 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -829,6 +829,11 @@ PRAGMA_ANNOTATION(pragma_fp_contract) // handles them. PRAGMA_ANNOTATION(pragma_fenv_access) +// Annotation for #pragma STDC FENV_ROUND +// The lexer produces these so that they only take effect when the parser +// handles them. +PRAGMA_ANNOTATION(pragma_fenv_round) + // Annotation for #pragma float_control // The lexer produces these so that they only take effect when the parser // handles them. diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 37ca9e8933298..af8cf47e56673 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -202,7 +202,8 @@ class Parser : public CodeCompletionHandler { std::unique_ptr UnrollAndJamHintHandler; std::unique_ptr NoUnrollAndJamHintHandler; std::unique_ptr FPHandler; - std::unique_ptr STDCFENVHandler; + std::unique_ptr STDCFenvAccessHandler; + std::unique_ptr STDCFenvRoundHandler; std::unique_ptr STDCCXLIMITHandler; std::unique_ptr STDCUnknownHandler; std::unique_ptr AttributePragmaHandler; @@ -745,6 +746,10 @@ class Parser : public CodeCompletionHandler { /// #pragma STDC FENV_ACCESS... void HandlePragmaFEnvAccess(); + /// Handle the annotation token produced for + /// #pragma STDC FENV_ROUND... + void HandlePragmaFEnvRound(); + /// Handle the annotation token produced for /// #pragma float_control void HandlePragmaFloatControl(); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ec449d6dd6be4..53d0285d37027 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -9749,7 +9749,7 @@ class Sema final { /// \#pragma STDC FENV_ACCESS void ActOnPragmaFEnvAccess(SourceLocation Loc, bool IsEnabled); - /// Called to set rounding mode for floating point operations. + /// Called to set constant rounding mode for floating point operations. void setRoundingMode(SourceLocation Loc, llvm::RoundingMode); /// Called to set exception behavior for floating point operations. diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index 6402b31d00b29..572fc7115b879 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -135,6 +135,14 @@ struct PragmaSTDC_CX_LIMITED_RANGEHandler : public PragmaHandler { } }; +/// Handler for "\#pragma STDC FENV_ROUND ...". +struct PragmaSTDC_FENV_ROUNDHandler : public PragmaHandler { + PragmaSTDC_FENV_ROUNDHandler() : PragmaHandler("FENV_ROUND") {} + + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, + Token &Tok) override; +}; + /// PragmaSTDC_UnknownHandler - "\#pragma STDC ...". struct PragmaSTDC_UnknownHandler : public PragmaHandler { PragmaSTDC_UnknownHandler() = default; @@ -312,8 +320,11 @@ void Parser::initializePragmaHandlers() { FPContractHandler = std::make_unique(); PP.AddPragmaHandler("STDC", FPContractHandler.get()); - STDCFENVHandler = std::make_unique(); - PP.AddPragmaHandler("STDC", STDCFENVHandler.get()); + STDCFenvAccessHandler = std::make_unique(); + PP.AddPragmaHandler("STDC", STDCFenvAccessHandler.get()); + + STDCFenvRoundHandler = std::make_unique(); + PP.AddPragmaHandler("STDC", STDCFenvRoundHandler.get()); STDCCXLIMITHandler = std::make_unique(); PP.AddPragmaHandler("STDC", STDCCXLIMITHandler.get()); @@ -485,8 +496,11 @@ void Parser::resetPragmaHandlers() { PP.RemovePragmaHandler("STDC", FPContractHandler.get()); FPContractHandler.reset(); - PP.RemovePragmaHandler("STDC", STDCFENVHandler.get()); - STDCFENVHandler.reset(); + PP.RemovePragmaHandler("STDC", STDCFenvAccessHandler.get()); + STDCFenvAccessHandler.reset(); + + PP.RemovePragmaHandler("STDC", STDCFenvRoundHandler.get()); + STDCFenvRoundHandler.reset(); PP.RemovePragmaHandler("STDC", STDCCXLIMITHandler.get()); STDCCXLIMITHandler.reset(); @@ -697,6 +711,14 @@ void Parser::HandlePragmaFEnvAccess() { Actions.ActOnPragmaFEnvAccess(PragmaLoc, IsEnabled); } +void Parser::HandlePragmaFEnvRound() { + assert(Tok.is(tok::annot_pragma_fenv_round)); + auto RM = static_cast( + reinterpret_cast(Tok.getAnnotationValue())); + + SourceLocation PragmaLoc = ConsumeAnnotationToken(); + Actions.setRoundingMode(PragmaLoc, RM); +} StmtResult Parser::HandlePragmaCaptured() { @@ -2929,6 +2951,56 @@ void PragmaFPHandler::HandlePragma(Preprocessor &PP, /*DisableMacroExpansion=*/false, /*IsReinject=*/false); } +void PragmaSTDC_FENV_ROUNDHandler::HandlePragma(Preprocessor &PP, + PragmaIntroducer Introducer, + Token &Tok) { + Token PragmaName = Tok; + SmallVector TokenList; + + PP.Lex(Tok); + if (Tok.isNot(tok::identifier)) { + PP.Diag(Tok.getLocation(), diag::warn_pragma_expected_identifier) + << PragmaName.getIdentifierInfo()->getName(); + return; + } + IdentifierInfo *II = Tok.getIdentifierInfo(); + + auto RM = + llvm::StringSwitch(II->getName()) + .Case("FE_TOWARDZERO", llvm::RoundingMode::TowardZero) + .Case("FE_TONEAREST", llvm::RoundingMode::NearestTiesToEven) + .Case("FE_UPWARD", llvm::RoundingMode::TowardPositive) + .Case("FE_DOWNWARD", llvm::RoundingMode::TowardNegative) + .Case("FE_TONEARESTFROMZERO", llvm::RoundingMode::NearestTiesToAway) + .Case("FE_DYNAMIC", llvm::RoundingMode::Dynamic) + .Default(llvm::RoundingMode::Invalid); + if (RM == llvm::RoundingMode::Invalid) { + PP.Diag(Tok.getLocation(), diag::warn_stdc_unknown_rounding_mode); + return; + } + PP.Lex(Tok); + + if (Tok.isNot(tok::eod)) { + PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol) + << "STDC FENV_ROUND"; + return; + } + + // Until the pragma is fully implemented, issue a warning. + PP.Diag(Tok.getLocation(), diag::warn_stdc_fenv_round_not_supported); + + MutableArrayRef Toks(PP.getPreprocessorAllocator().Allocate(1), + 1); + Toks[0].startToken(); + Toks[0].setKind(tok::annot_pragma_fenv_round); + Toks[0].setLocation(Tok.getLocation()); + Toks[0].setAnnotationEndLoc(Tok.getLocation()); + Toks[0].setAnnotationValue( + reinterpret_cast(static_cast(RM))); + PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true, + /*IsReinject=*/false); +} + void Parser::HandlePragmaFP() { assert(Tok.is(tok::annot_pragma_fp)); auto *AnnotValue = diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index d017842e7754f..ee35b24b3c816 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -369,6 +369,12 @@ StmtResult Parser::ParseStatementOrDeclarationAfterAttributes( HandlePragmaFEnvAccess(); return StmtEmpty(); + case tok::annot_pragma_fenv_round: + ProhibitAttributes(Attrs); + Diag(Tok, diag::err_pragma_file_or_compound_scope) << "STDC FENV_ROUND"; + ConsumeAnnotationToken(); + return StmtError(); + case tok::annot_pragma_float_control: ProhibitAttributes(Attrs); Diag(Tok, diag::err_pragma_file_or_compound_scope) << "float_control"; @@ -943,6 +949,9 @@ void Parser::ParseCompoundStatementLeadingPragmas() { case tok::annot_pragma_fenv_access: HandlePragmaFEnvAccess(); break; + case tok::annot_pragma_fenv_round: + HandlePragmaFEnvRound(); + break; case tok::annot_pragma_float_control: HandlePragmaFloatControl(); break; diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index c72ffde8fc263..109f24425777d 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -783,6 +783,9 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs, case tok::annot_pragma_fenv_access: HandlePragmaFEnvAccess(); return nullptr; + case tok::annot_pragma_fenv_round: + HandlePragmaFEnvRound(); + return nullptr; case tok::annot_pragma_float_control: HandlePragmaFloatControl(); return nullptr; diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp index e34f7371506dd..bd5fc586b6af7 100644 --- a/clang/lib/Sema/SemaAttr.cpp +++ b/clang/lib/Sema/SemaAttr.cpp @@ -979,6 +979,11 @@ void Sema::ActOnPragmaFPReassociate(SourceLocation Loc, bool IsEnabled) { } void Sema::setRoundingMode(SourceLocation Loc, llvm::RoundingMode FPR) { + // C2x: 7.6.2p3 If the FE_DYNAMIC mode is specified and FENV_ACCESS is "off", + // the translator may assume that the default rounding mode is in effect. + if (FPR == llvm::RoundingMode::Dynamic && !CurFPFeatures.getAllowFEnvAccess()) + FPR = llvm::RoundingMode::NearestTiesToEven; + FPOptionsOverride NewFPFeatures = CurFPFeatureOverrides(); NewFPFeatures.setRoundingModeOverride(FPR); FpPragmaStack.Act(Loc, PSK_Set, StringRef(), NewFPFeatures); diff --git a/clang/test/AST/ast-dump-fpfeatures.cpp b/clang/test/AST/ast-dump-fpfeatures.cpp index 796b0a0283828..f3925aebbe752 100644 --- a/clang/test/AST/ast-dump-fpfeatures.cpp +++ b/clang/test/AST/ast-dump-fpfeatures.cpp @@ -34,4 +34,69 @@ float func_03(float x) { // CHECK-NEXT: ParmVarDecl {{.*}} x 'float' // CHECK-NEXT: CompoundStmt // CHECK-NEXT: ReturnStmt -// CHECK-NEXT: CallExpr {{.*}} FPContractMode=0 \ No newline at end of file +// CHECK-NEXT: CallExpr {{.*}} FPContractMode=0 + + + + +#pragma STDC FENV_ROUND FE_DOWNWARD + +float func_10(float x, float y) { + return x + y; +} + +// CHECK-LABEL: FunctionDecl {{.*}} func_10 'float (float, float)' +// CHECK: BinaryOperator {{.*}} 'float' '+' RoundingMode=3 + +float func_11(float x, float y) { + if (x < 0) { + #pragma STDC FENV_ROUND FE_UPWARD + return x + y; + } + return x - y; +} + +// CHECK-LABEL: FunctionDecl {{.*}} func_11 'float (float, float)' +// CHECK: BinaryOperator {{.*}} 'float' '+' RoundingMode=2 +// CHECK: BinaryOperator {{.*}} 'float' '-' RoundingMode=3 + + +#pragma STDC FENV_ROUND FE_DYNAMIC + +float func_12(float x, float y) { + return x + y; +} + +// CHECK-LABEL: FunctionDecl {{.*}} func_12 'float (float, float)' +// CHECK: BinaryOperator {{.*}} 'float' '+' RoundingMode=1 + +#pragma STDC FENV_ROUND FE_TONEAREST + +float func_13(float x, float y) { + return x + y; +} + +// CHECK-LABEL: FunctionDecl {{.*}} func_13 'float (float, float)' +// CHECK: BinaryOperator {{.*}} 'float' '+' RoundingMode=1 + + +template +T func_14(T x, T y) { +#pragma STDC FENV_ROUND FE_TOWARDZERO + return x + y; +} + +float func_15(float x, float y) { +#pragma STDC FPENV_ROUND FE_DOWNWARD + return func_14(x, y); +} + +// CHECK-LABEL: FunctionTemplateDecl {{.*}} func_14 +// CHECK: FunctionDecl {{.*}} func_14 'T (T, T)' +// CHECK: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: BinaryOperator {{.*}} '+' RoundingMode=0 +// CHECK: FunctionDecl {{.*}} func_14 'float (float, float)' +// CHECK: CompoundStmt +// CHECK-NEXT: ReturnStmt +// CHECK-NEXT: BinaryOperator {{.*}} 'float' '+' RoundingMode=0 diff --git a/clang/test/Parser/pragma-fenv_round.c b/clang/test/Parser/pragma-fenv_round.c new file mode 100644 index 0000000000000..56abf7bf75a40 --- /dev/null +++ b/clang/test/Parser/pragma-fenv_round.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -fsyntax-only -Wignored-pragmas -verify %s + +#pragma STDC FENV_ROUND ON // expected-warning {{invalid or unsupported rounding mode}} + +float func_01(int x, float y) { + if (x) + return y + 2; + #pragma STDC FENV_ROUND FE_DOWNWARD // expected-error{{'#pragma STDC FENV_ROUND' can only appear at file scope or at the start of a compound statement}} + // expected-warning@-1{{pragma STDC FENV_ROUND is not supported}} + return x + y; +} From fe0972d3e4a65b4c5f5fa602b17ad30e463050b3 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Thu, 3 Sep 2020 19:08:54 +0200 Subject: [PATCH 180/465] [analyzer][StdLibraryFunctionsChecker] Do not match based on the restrict qualifier in C++ The "restrict" keyword is illegal in C++, however, many libc implementations use the "__restrict" compiler intrinsic in functions prototypes. The "__restrict" keyword qualifies a type as a restricted type even in C++. In case of any non-C99 languages, we don't want to match based on the restrict qualifier because we cannot know if the given libc implementation qualifies the paramter type or not. Differential Revision: https://reviews.llvm.org/D87097 --- .../Checkers/StdLibraryFunctionsChecker.cpp | 40 ++++++++++++++++--- .../std-c-library-functions-restrict.c | 24 +++++++++++ .../std-c-library-functions-restrict.cpp | 25 ++++++++++++ 3 files changed, 83 insertions(+), 6 deletions(-) create mode 100644 clang/test/Analysis/std-c-library-functions-restrict.c create mode 100644 clang/test/Analysis/std-c-library-functions-restrict.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index c65d58e49d785..2c20422a9cc48 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -744,21 +744,38 @@ bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call, bool StdLibraryFunctionsChecker::Signature::matches( const FunctionDecl *FD) const { assert(!isInvalid()); - // Check number of arguments: + // Check the number of arguments. if (FD->param_size() != ArgTys.size()) return false; - // Check return type. - if (!isIrrelevant(RetTy)) - if (RetTy != FD->getReturnType().getCanonicalType()) + // The "restrict" keyword is illegal in C++, however, many libc + // implementations use the "__restrict" compiler intrinsic in functions + // prototypes. The "__restrict" keyword qualifies a type as a restricted type + // even in C++. + // In case of any non-C99 languages, we don't want to match based on the + // restrict qualifier because we cannot know if the given libc implementation + // qualifies the paramter type or not. + auto RemoveRestrict = [&FD](QualType T) { + if (!FD->getASTContext().getLangOpts().C99) + T.removeLocalRestrict(); + return T; + }; + + // Check the return type. + if (!isIrrelevant(RetTy)) { + QualType FDRetTy = RemoveRestrict(FD->getReturnType().getCanonicalType()); + if (RetTy != FDRetTy) return false; + } - // Check argument types. + // Check the argument types. for (size_t I = 0, E = ArgTys.size(); I != E; ++I) { QualType ArgTy = ArgTys[I]; if (isIrrelevant(ArgTy)) continue; - if (ArgTy != FD->getParamDecl(I)->getType().getCanonicalType()) + QualType FDArgTy = + RemoveRestrict(FD->getParamDecl(I)->getType().getCanonicalType()); + if (ArgTy != FDArgTy) return false; } @@ -989,6 +1006,12 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( for (const Summary &S : Summaries) operator()(Name, S); } + // Add the same summary for different names with the Signature explicitly + // given. + void operator()(std::vector Names, Signature Sign, Summary Sum) { + for (StringRef Name : Names) + operator()(Name, Sign, Sum); + } } addToFunctionSummaryMap(ACtx, FunctionSummaryMap, DisplayLoadedSummaries); // Below are helpers functions to create the summaries. @@ -2048,6 +2071,11 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( EvalCallAsPure) .ArgConstraint(BufferSize(/*Buffer=*/ArgNo(0), /*BufSize=*/ArgNo(1), /*BufSizeMultiplier=*/ArgNo(2)))); + addToFunctionSummaryMap( + {"__test_restrict_param_0", "__test_restrict_param_1", + "__test_restrict_param_2"}, + Signature(ArgTypes{VoidPtrRestrictTy}, RetType{VoidTy}), + Summary(EvalCallAsPure)); } } diff --git a/clang/test/Analysis/std-c-library-functions-restrict.c b/clang/test/Analysis/std-c-library-functions-restrict.c new file mode 100644 index 0000000000000..7cf5f2bc630a3 --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-restrict.c @@ -0,0 +1,24 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s + +// The signatures for these functions are the same and they specify their +// parameter with the restrict qualifier. In C, the signature should match only +// if the restrict qualifier is there on the parameter. Thus, the summary +// should be loaded for the last two declarations only. +void __test_restrict_param_0(void *p); +void __test_restrict_param_1(void *__restrict p); +void __test_restrict_param_2(void *restrict p); + +// CHECK-NOT: Loaded summary for: void __test_restrict_param_0 +// CHECK: Loaded summary for: void __test_restrict_param_1(void *restrict p) +// CHECK: Loaded summary for: void __test_restrict_param_2(void *restrict p) + +// Must have at least one call expression to initialize the summary map. +int bar(void); +void foo() { + bar(); +} diff --git a/clang/test/Analysis/std-c-library-functions-restrict.cpp b/clang/test/Analysis/std-c-library-functions-restrict.cpp new file mode 100644 index 0000000000000..d1cd090f5ef85 --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-restrict.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-checker=debug.StdCLibraryFunctionsTester \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -triple i686-unknown-linux 2>&1 | FileCheck %s + +// The signatures for these functions are the same and they specify their +// parameter with the restrict qualifier. In C++, however, we are more +// indulgent and we do not match based on this qualifier. Thus, the given +// signature should match for both of the declarations below, i.e the summary +// should be loaded for both of them. +void __test_restrict_param_0(void *p); +void __test_restrict_param_1(void *__restrict p); +// The below declaration is illegal, "restrict" is not a keyword in C++. +// void __test_restrict_param_2(void *restrict p); + +// CHECK: Loaded summary for: void __test_restrict_param_0(void *p) +// CHECK: Loaded summary for: void __test_restrict_param_1(void *__restrict p) + +// Must have at least one call expression to initialize the summary map. +int bar(void); +void foo() { + bar(); +} From b7c181098675eb75bc79a9b38891ee88e56a9103 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 3 Sep 2020 14:19:14 +0300 Subject: [PATCH 181/465] [llvm-readobj/elf] - Refine signature of print*Reloc methods. This makes the interface cleaner and slightly improves messages reported. Differential revision: https://reviews.llvm.org/D87086 --- .../llvm-readobj/ELF/relocation-errors.test | 20 ++--- llvm/tools/llvm-readobj/ELFDumper.cpp | 85 ++++++++++--------- 2 files changed, 54 insertions(+), 51 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/relocation-errors.test b/llvm/test/tools/llvm-readobj/ELF/relocation-errors.test index aefc0b0f0e16d..59cf7155e2eb5 100644 --- a/llvm/test/tools/llvm-readobj/ELF/relocation-errors.test +++ b/llvm/test/tools/llvm-readobj/ELF/relocation-errors.test @@ -6,28 +6,28 @@ # LLVM: Relocations [ # LLVM-NEXT: Section (3) .rel.text { -# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 1 in section 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file -# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 2 in section 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file +# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 1 in SHT_REL section with index 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file +# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 2 in SHT_REL section with index 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file # LLVM-NEXT: 0x2 R_X86_64_NONE - 0x0 -# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 4 in section 3: invalid section index: 255 -# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 5 in section 3: a section [index 2] has an invalid sh_name (0xfefefefe) offset which goes past the end of the section name string table +# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 4 in SHT_REL section with index 3: invalid section index: 255 +# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 5 in SHT_REL section with index 3: a section [index 2] has an invalid sh_name (0xfefefefe) offset which goes past the end of the section name string table # LLVM-NEXT: } # LLVM-NEXT: Section (4) .rela.text { -# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 1 in section 4: invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM +# LLVM-NEXT: warning: '[[FILE]]': unable to print relocation 1 in SHT_RELA section with index 4: invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM # LLVM-NEXT: } # LLVM-NEXT: ] # GNU: Relocation section '.rel.text' at offset 0x41 contains 5 entries: # GNU-NEXT: Offset Info Type Symbol's Value Symbol's Name -# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 1 in section 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file -# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 2 in section 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file +# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 1 in SHT_REL section with index 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file +# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 2 in SHT_REL section with index 3: unable to access section [index 6] data at 0x17e7e7e8b0: offset goes past the end of file # GNU-NEXT: 0000000000000002 0000000000000000 R_X86_64_NONE -# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 4 in section 3: invalid section index: 255 -# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 5 in section 3: a section [index 2] has an invalid sh_name (0xfefefefe) offset which goes past the end of the section name string table +# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 4 in SHT_REL section with index 3: invalid section index: 255 +# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 5 in SHT_REL section with index 3: a section [index 2] has an invalid sh_name (0xfefefefe) offset which goes past the end of the section name string table # GNU-EMPTY: # GNU-NEXT: Relocation section '.rela.text' at offset 0x91 contains 1 entries: # GNU-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend -# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 1 in section 4: invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM +# GNU-NEXT: warning: '[[FILE]]': unable to print relocation 1 in SHT_RELA section with index 4: invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM --- !ELF FileHeader: diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index b81213d253d5c..ca0f93cc1d612 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -747,10 +747,10 @@ template class DumpStyle { function_ref OnSectionStart, function_ref OnSectionEntry); - virtual void printRelReloc(unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rel &R, unsigned RelIndex) = 0; - virtual void printRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rela &R, unsigned RelIndex) = 0; + virtual void printRelReloc(const Elf_Rel &R, unsigned RelIndex, + const Elf_Shdr *Sec, const Elf_Shdr *SymTab) = 0; + virtual void printRelaReloc(const Elf_Rela &R, unsigned RelIndex, + const Elf_Shdr *Sec, const Elf_Shdr *SymTab) = 0; virtual void printRelrReloc(const Elf_Relr &R) = 0; void printRelocationsHelper(const Elf_Shdr &Sec); @@ -863,15 +863,15 @@ template class GNUStyle : public DumpStyle { } void printHashedSymbol(const Elf_Sym *FirstSym, uint32_t Sym, StringRef StrTable, uint32_t Bucket); - void printRelReloc(unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rel &R, unsigned RelIndex) override; - void printRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rela &R, unsigned RelIndex) override; + void printRelReloc(const Elf_Rel &R, unsigned RelIndex, const Elf_Shdr *Sec, + const Elf_Shdr *SymTab) override; + void printRelaReloc(const Elf_Rela &R, unsigned RelIndex, const Elf_Shdr *Sec, + const Elf_Shdr *SymTab) override; void printRelrReloc(const Elf_Relr &R) override; template - void printRelRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, - const RelTy &R, unsigned RelIndex); + void printRelRelaReloc(const RelTy &R, unsigned RelIndex, const Elf_Shdr &Sec, + const Elf_Shdr *SymTab); template void printRelRelaReloc(const Elf_Sym *Sym, StringRef SymbolName, const RelTy &R); @@ -932,13 +932,13 @@ template class LLVMStyle : public DumpStyle { void printMipsABIFlags(const ELFObjectFile *Obj) override; private: - void printRelReloc(unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rel &R, unsigned RelIndex) override; - void printRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rela &R, unsigned RelIndex) override; + void printRelReloc(const Elf_Rel &R, unsigned RelIndex, const Elf_Shdr *Sec, + const Elf_Shdr *SymTab) override; + void printRelaReloc(const Elf_Rela &R, unsigned RelIndex, const Elf_Shdr *Sec, + const Elf_Shdr *SymTab) override; void printRelrReloc(const Elf_Relr &R) override; template - void printRelRelaReloc(unsigned SecIndex, const RelTy &Rel, unsigned RelIndex, + void printRelRelaReloc(const RelTy &R, unsigned RelIndex, const Elf_Shdr &Sec, const Elf_Shdr *SymTab); template void printDynamicRelocation(const RelTy &Rel); @@ -3601,15 +3601,17 @@ template void GNUStyle::printGroupSections() { } template -void GNUStyle::printRelReloc(unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rel &R, unsigned RelIndex) { - printRelRelaReloc(SecIndex, SymTab, R, RelIndex); +void GNUStyle::printRelReloc(const Elf_Rel &R, unsigned RelIndex, + const Elf_Shdr *Sec, + const Elf_Shdr *SymTab) { + printRelRelaReloc(R, RelIndex, *Sec, SymTab); } template -void GNUStyle::printRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rela &R, unsigned RelIndex) { - printRelRelaReloc(SecIndex, SymTab, R, RelIndex); +void GNUStyle::printRelaReloc(const Elf_Rela &R, unsigned RelIndex, + const Elf_Shdr *Sec, + const Elf_Shdr *SymTab) { + printRelRelaReloc(R, RelIndex, *Sec, SymTab); } template void GNUStyle::printRelrReloc(const Elf_Relr &R) { @@ -3618,15 +3620,15 @@ template void GNUStyle::printRelrReloc(const Elf_Relr &R) { template template -void GNUStyle::printRelRelaReloc(unsigned SecIndex, - const Elf_Shdr *SymTab, const RelTy &R, - unsigned RelIndex) { +void GNUStyle::printRelRelaReloc(const RelTy &R, unsigned RelIndex, + const Elf_Shdr &Sec, + const Elf_Shdr *SymTab) { Expected> Target = this->dumper()->getRelocationTarget(SymTab, R); if (!Target) this->reportUniqueWarning(createError( - "unable to print relocation " + Twine(RelIndex) + " in section " + - Twine(SecIndex) + ": " + toString(Target.takeError()))); + "unable to print relocation " + Twine(RelIndex) + " in " + + describe(this->Obj, Sec) + ": " + toString(Target.takeError()))); else printRelRelaReloc(/*Sym=*/Target->first, /*Name=*/Target->second, R); } @@ -5466,13 +5468,12 @@ void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { SymTab = *SymTabOrErr; } - unsigned SecNdx = &Sec - &cantFail(Obj.sections()).front(); unsigned RelNdx = 0; switch (Sec.sh_type) { case ELF::SHT_REL: if (Expected RangeOrErr = Obj.rels(&Sec)) { for (const Elf_Rel &R : *RangeOrErr) - printRelReloc(SecNdx, SymTab, R, ++RelNdx); + printRelReloc(R, ++RelNdx, &Sec, SymTab); } else { Warn(RangeOrErr.takeError()); } @@ -5480,7 +5481,7 @@ void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { case ELF::SHT_RELA: if (Expected RangeOrErr = Obj.relas(&Sec)) { for (const Elf_Rela &R : *RangeOrErr) - printRelaReloc(SecNdx, SymTab, R, ++RelNdx); + printRelaReloc(R, ++RelNdx, &Sec, SymTab); } else { Warn(RangeOrErr.takeError()); } @@ -5499,14 +5500,14 @@ void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { } for (const Elf_Rel &R : Obj.decode_relrs(*RangeOrErr)) - printRelReloc(SecNdx, /*SymTab=*/nullptr, R, ++RelNdx); + printRelReloc(R, ++RelNdx, &Sec, /*SymTab=*/nullptr); break; } case ELF::SHT_ANDROID_REL: case ELF::SHT_ANDROID_RELA: if (Expected> RelasOrErr = Obj.android_relas(&Sec)) { for (const Elf_Rela &R : *RelasOrErr) - printRelaReloc(SecNdx, SymTab, R, ++RelNdx); + printRelaReloc(R, ++RelNdx, &Sec, SymTab); } else { Warn(RelasOrErr.takeError()); } @@ -6146,15 +6147,17 @@ template void LLVMStyle::printRelocations() { } template -void LLVMStyle::printRelReloc(unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rel &R, unsigned RelIndex) { - printRelRelaReloc(SecIndex, R, RelIndex, SymTab); +void LLVMStyle::printRelReloc(const Elf_Rel &R, unsigned RelIndex, + const Elf_Shdr *Sec, + const Elf_Shdr *SymTab) { + printRelRelaReloc(R, RelIndex, *Sec, SymTab); } template -void LLVMStyle::printRelaReloc(unsigned SecIndex, const Elf_Shdr *SymTab, - const Elf_Rela &R, unsigned RelIndex) { - printRelRelaReloc(SecIndex, R, RelIndex, SymTab); +void LLVMStyle::printRelaReloc(const Elf_Rela &R, unsigned RelIndex, + const Elf_Shdr *Sec, + const Elf_Shdr *SymTab) { + printRelRelaReloc(R, RelIndex, *Sec, SymTab); } template void LLVMStyle::printRelrReloc(const Elf_Relr &R) { @@ -6163,15 +6166,15 @@ template void LLVMStyle::printRelrReloc(const Elf_Relr &R) { template template -void LLVMStyle::printRelRelaReloc(unsigned SecIndex, const RelTy &Rel, - unsigned RelIndex, +void LLVMStyle::printRelRelaReloc(const RelTy &Rel, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { Expected> Target = this->dumper()->getRelocationTarget(SymTab, Rel); if (!Target) { this->reportUniqueWarning(createError( - "unable to print relocation " + Twine(RelIndex) + " in section " + - Twine(SecIndex) + ": " + toString(Target.takeError()))); + "unable to print relocation " + Twine(RelIndex) + " in " + + describe(this->Obj, Sec) + ": " + toString(Target.takeError()))); return; } From 3a1308be051711473e4cdd4aa12c70070991f648 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 4 Sep 2020 11:17:02 +0100 Subject: [PATCH 182/465] MIRFormatter.h - remove MachineInstr.h include. NFC. Use forward declarations and include the inner dependencies directly. --- llvm/include/llvm/CodeGen/MIRFormatter.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h index e57c32c5ae614..9cb92091db50b 100644 --- a/llvm/include/llvm/CodeGen/MIRFormatter.h +++ b/llvm/include/llvm/CodeGen/MIRFormatter.h @@ -14,11 +14,15 @@ #ifndef LLVM_CODEGEN_MIRFORMATTER_H #define LLVM_CODEGEN_MIRFORMATTER_H -#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/raw_ostream.h" +#include namespace llvm { +class MachineFunction; +class MachineInstr; struct PerFunctionMIParsingState; struct SlotMapping; From 0faf3930c405c320d8ed210aa0b44ef608e208da Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 3 Sep 2020 17:45:08 +0300 Subject: [PATCH 183/465] [llvm-readelf/obj] - Use `RelSymbol` instead of std::pair. NFCI. We have the `RelSymbol` struct and can use it instead of `std::pair` in a few methods. This is a bit cleaner. Differential revision: https://reviews.llvm.org/D87092 --- llvm/tools/llvm-readobj/ELFDumper.cpp | 54 ++++++++++++++------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index ca0f93cc1d612..ab8b546a7b764 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -114,6 +114,13 @@ namespace { template class DumpStyle; +template struct RelSymbol { + RelSymbol(const typename ELFT::Sym *S, StringRef N) + : Sym(S), Name(N.str()) {} + const typename ELFT::Sym *Sym; + std::string Name; +}; + /// Represents a contiguous uniform range in the file. We cannot just create a /// range directly because when creating one of these from the .dynamic table /// the size, entity size and virtual address are different entries in arbitrary @@ -364,8 +371,8 @@ template class ELFDumper : public ObjDumper { getVersionDependencies(const Elf_Shdr *Sec) const; template - Expected> - getRelocationTarget(const Elf_Shdr *SymTab, const RelTy &R) const; + Expected> getRelocationTarget(const Elf_Shdr *SymTab, + const RelTy &R) const; std::function WarningHandler; void reportUniqueWarning(Error Err) const; @@ -873,8 +880,7 @@ template class GNUStyle : public DumpStyle { void printRelRelaReloc(const RelTy &R, unsigned RelIndex, const Elf_Shdr &Sec, const Elf_Shdr *SymTab); template - void printRelRelaReloc(const Elf_Sym *Sym, StringRef SymbolName, - const RelTy &R); + void printRelRelaReloc(const RelTy &R, const RelSymbol &RelSym); void printSymbol(const Elf_Sym *Symbol, const Elf_Sym *First, Optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed) override; @@ -1053,7 +1059,7 @@ Expected ELFDumper::getSymbolVersion(const Elf_Sym *Sym, template template -Expected> +Expected> ELFDumper::getRelocationTarget(const Elf_Shdr *SymTab, const RelTy &R) const { const ELFFile *Obj = ObjF->getELFFile(); @@ -1062,7 +1068,7 @@ ELFDumper::getRelocationTarget(const Elf_Shdr *SymTab, return SymOrErr.takeError(); const Elf_Sym *Sym = *SymOrErr; if (!Sym) - return std::make_pair(nullptr, ""); + return RelSymbol(nullptr, ""); // The st_name field of a STT_SECTION is usually 0 (empty string). // This code block returns the section name. @@ -1073,12 +1079,12 @@ ELFDumper::getRelocationTarget(const Elf_Shdr *SymTab, return SecOrErr.takeError(); // A section symbol describes the section at index 0. if (*SecOrErr == nullptr) - return std::make_pair(Sym, ""); + return RelSymbol(Sym, ""); Expected NameOrErr = Obj->getSectionName(*SecOrErr); if (!NameOrErr) return NameOrErr.takeError(); - return std::make_pair(Sym, NameOrErr->str()); + return RelSymbol(Sym, NameOrErr->str()); } Expected StrTableOrErr = Obj->getStringTableForSymtab(*SymTab); @@ -1087,7 +1093,7 @@ ELFDumper::getRelocationTarget(const Elf_Shdr *SymTab, std::string SymbolName = getFullSymbolName(Sym, *StrTableOrErr, SymTab->sh_type == SHT_DYNSYM); - return std::make_pair(Sym, SymbolName); + return RelSymbol(Sym, SymbolName); } static std::string maybeDemangle(StringRef Name) { @@ -3623,14 +3629,14 @@ template void GNUStyle::printRelRelaReloc(const RelTy &R, unsigned RelIndex, const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { - Expected> Target = + Expected> Target = this->dumper()->getRelocationTarget(SymTab, R); if (!Target) this->reportUniqueWarning(createError( "unable to print relocation " + Twine(RelIndex) + " in " + describe(this->Obj, Sec) + ": " + toString(Target.takeError()))); else - printRelRelaReloc(/*Sym=*/Target->first, /*Name=*/Target->second, R); + printRelRelaReloc(R, *Target); } template @@ -3645,8 +3651,8 @@ static Optional getAddend(const typename ELFT::Rel &) { template template -void GNUStyle::printRelRelaReloc(const Elf_Sym *Sym, StringRef SymbolName, - const RelTy &R) { +void GNUStyle::printRelRelaReloc(const RelTy &R, + const RelSymbol &RelSym) { // First two fields are bit width dependent. The rest of them are fixed width. unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias}; @@ -3659,17 +3665,18 @@ void GNUStyle::printRelRelaReloc(const Elf_Sym *Sym, StringRef SymbolName, this->Obj.getRelocationTypeName(R.getType(this->Obj.isMips64EL()), RelocName); Fields[2].Str = RelocName.c_str(); - if (Sym) - Fields[3].Str = to_string(format_hex_no_prefix(Sym->getValue(), Width)); + if (RelSym.Sym) + Fields[3].Str = + to_string(format_hex_no_prefix(RelSym.Sym->getValue(), Width)); - Fields[4].Str = std::string(SymbolName); + Fields[4].Str = std::string(RelSym.Name); for (const Field &F : Fields) printField(F); std::string Addend; if (Optional A = getAddend(R)) { int64_t RelAddend = *A; - if (!SymbolName.empty()) { + if (!RelSym.Name.empty()) { if (RelAddend < 0) { Addend = " - "; RelAddend = std::abs(RelAddend); @@ -4349,10 +4356,6 @@ template void GNUStyle::printSectionMapping() { } namespace { -template struct RelSymbol { - const typename ELFT::Sym *Sym; - std::string Name; -}; template RelSymbol getSymbolForReloc(const ELFFile &Obj, StringRef FileName, @@ -4394,9 +4397,8 @@ RelSymbol getSymbolForReloc(const ELFFile &Obj, StringRef FileName, template template void GNUStyle::printDynamicRelocation(const RelTy &R) { - RelSymbol S = - getSymbolForReloc(this->Obj, this->FileName, this->dumper(), R); - printRelRelaReloc(S.Sym, S.Name, R); + printRelRelaReloc( + R, getSymbolForReloc(this->Obj, this->FileName, this->dumper(), R)); } template @@ -6169,7 +6171,7 @@ template void LLVMStyle::printRelRelaReloc(const RelTy &Rel, unsigned RelIndex, const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { - Expected> Target = + Expected> Target = this->dumper()->getRelocationTarget(SymTab, Rel); if (!Target) { this->reportUniqueWarning(createError( @@ -6178,7 +6180,7 @@ void LLVMStyle::printRelRelaReloc(const RelTy &Rel, unsigned RelIndex, return; } - std::string TargetName = Target->second; + std::string TargetName = Target->Name; SmallString<32> RelocName; this->Obj.getRelocationTypeName(Rel.getType(this->Obj.isMips64EL()), RelocName); From 294c0cc3ebad969819be4b5b8d091418b0704595 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 4 Sep 2020 11:29:59 +0100 Subject: [PATCH 184/465] [ARM] Fold predicate_cast(load) into vldr p0 This adds a simple tablegen pattern for folding predicate_cast(load) into vldr p0, providing the alignment and offset are correct. Differential Revision: https://reviews.llvm.org/D86702 --- llvm/lib/Target/ARM/ARMInstrMVE.td | 11 +++ .../test/CodeGen/Thumb2/mve-pred-loadstore.ll | 68 +++++++------------ 2 files changed, 35 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index eda41e8eef065..75543093bcbfe 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -4382,6 +4382,10 @@ let Predicates = [HasMVEInt] in { // vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles. def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>; +def load_align4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 4; +}]>; + let Predicates = [HasMVEInt] in { foreach VT = [ v4i1, v8i1, v16i1 ] in { def : Pat<(i32 (predicate_cast (VT VCCR:$src))), @@ -4394,6 +4398,13 @@ let Predicates = [HasMVEInt] in { (VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>; } + // If we happen to be casting from a load we can convert that straight + // into a predicate load, so long as the load is of the correct type. + foreach VT = [ v4i1, v8i1, v16i1 ] in { + def : Pat<(VT (predicate_cast (i32 (load_align4 taddrmode_imm7<2>:$addr)))), + (VT (VLDR_P0_off taddrmode_imm7<2>:$addr))>; + } + // Here we match the specific SDNode type 'ARMVectorRegCastImpl' // rather than the more general 'ARMVectorRegCast' which would also // match some bitconverts. If we use the latter in cases where the diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll index b2ef0d41be6bf..944505321b244 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll @@ -386,18 +386,16 @@ define arm_aapcs_vfpcc <4 x i32> @load_predcastzext(i16* %i, <4 x i32> %a) { define arm_aapcs_vfpcc <4 x i32> @load_bc4(i32* %i, <4 x i32> %a) { ; CHECK-LE-LABEL: load_bc4: ; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vldr p0, [r0] ; CHECK-LE-NEXT: vmov.i32 q1, #0x0 -; CHECK-LE-NEXT: vmsr p0, r0 ; CHECK-LE-NEXT: vpsel q0, q0, q1 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: load_bc4: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: vldr p0, [r0] ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: bx lr @@ -410,19 +408,17 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4(i32* %i, <4 x i32> %a) { define arm_aapcs_vfpcc <8 x i16> @load_predcast8(i32* %i, <8 x i16> %a) { ; CHECK-LE-LABEL: load_predcast8: ; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vldr p0, [r0] ; CHECK-LE-NEXT: vmov.i32 q1, #0x0 -; CHECK-LE-NEXT: vmsr p0, r0 ; CHECK-LE-NEXT: vpsel q0, q0, q1 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: load_predcast8: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldr r0, [r0] ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vldr p0, [r0] ; CHECK-BE-NEXT: vrev32.16 q0, q0 -; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.16 q0, q1 ; CHECK-BE-NEXT: bx lr @@ -435,19 +431,17 @@ define arm_aapcs_vfpcc <8 x i16> @load_predcast8(i32* %i, <8 x i16> %a) { define arm_aapcs_vfpcc <16 x i8> @load_predcast16(i32* %i, <16 x i8> %a) { ; CHECK-LE-LABEL: load_predcast16: ; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: ldr r0, [r0] +; CHECK-LE-NEXT: vldr p0, [r0] ; CHECK-LE-NEXT: vmov.i32 q1, #0x0 -; CHECK-LE-NEXT: vmsr p0, r0 ; CHECK-LE-NEXT: vpsel q0, q0, q1 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: load_predcast16: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldr r0, [r0] ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vldr p0, [r0] ; CHECK-BE-NEXT: vrev32.8 q0, q0 -; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.8 q0, q1 ; CHECK-BE-NEXT: bx lr @@ -484,18 +478,18 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_align2(i32* %i, <4 x i32> %a) { define arm_aapcs_vfpcc <4 x i32> @load_bc4_offset(i16* %i, <4 x i32> %a) { ; CHECK-LE-LABEL: load_bc4_offset: ; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: ldr.w r0, [r0, #6] +; CHECK-LE-NEXT: adds r0, #6 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0 -; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vldr p0, [r0] ; CHECK-LE-NEXT: vpsel q0, q0, q1 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: load_bc4_offset: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldr.w r0, [r0, #6] +; CHECK-BE-NEXT: adds r0, #6 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vldr p0, [r0] ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: bx lr @@ -510,18 +504,16 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_offset(i16* %i, <4 x i32> %a) { define arm_aapcs_vfpcc <4 x i32> @load_bc4_range4(i32* %i, <4 x i32> %a) { ; CHECK-LE-LABEL: load_bc4_range4: ; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: ldr r0, [r0, #4] +; CHECK-LE-NEXT: vldr p0, [r0, #4] ; CHECK-LE-NEXT: vmov.i32 q1, #0x0 -; CHECK-LE-NEXT: vmsr p0, r0 ; CHECK-LE-NEXT: vpsel q0, q0, q1 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: load_bc4_range4: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldr r0, [r0, #4] +; CHECK-BE-NEXT: vldr p0, [r0, #4] ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: bx lr @@ -535,18 +527,16 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_range4(i32* %i, <4 x i32> %a) { define arm_aapcs_vfpcc <4 x i32> @load_bc4_range(i32* %i, <4 x i32> %a) { ; CHECK-LE-LABEL: load_bc4_range: ; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: ldr.w r0, [r0, #508] +; CHECK-LE-NEXT: vldr p0, [r0, #508] ; CHECK-LE-NEXT: vmov.i32 q1, #0x0 -; CHECK-LE-NEXT: vmsr p0, r0 ; CHECK-LE-NEXT: vpsel q0, q0, q1 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: load_bc4_range: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldr.w r0, [r0, #508] +; CHECK-BE-NEXT: vldr p0, [r0, #508] ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: bx lr @@ -560,22 +550,16 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_range(i32* %i, <4 x i32> %a) { define arm_aapcs_vfpcc <4 x i32> @load_bc4_range2(i32* %i, <4 x i32> %a) { ; CHECK-LE-LABEL: load_bc4_range2: ; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: movw r1, #65028 +; CHECK-LE-NEXT: vldr p0, [r0, #-508] ; CHECK-LE-NEXT: vmov.i32 q1, #0x0 -; CHECK-LE-NEXT: movt r1, #65535 -; CHECK-LE-NEXT: ldr r0, [r0, r1] -; CHECK-LE-NEXT: vmsr p0, r0 ; CHECK-LE-NEXT: vpsel q0, q0, q1 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: load_bc4_range2: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: movw r1, #65028 +; CHECK-BE-NEXT: vldr p0, [r0, #-508] ; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movt r1, #65535 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: ldr r0, [r0, r1] -; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: bx lr @@ -589,18 +573,18 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_range2(i32* %i, <4 x i32> %a) { define arm_aapcs_vfpcc <4 x i32> @load_bc4_range3(i32* %i, <4 x i32> %a) { ; CHECK-LE-LABEL: load_bc4_range3: ; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: ldr.w r0, [r0, #512] +; CHECK-LE-NEXT: add.w r0, r0, #512 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0 -; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vldr p0, [r0] ; CHECK-LE-NEXT: vpsel q0, q0, q1 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: load_bc4_range3: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: ldr.w r0, [r0, #512] +; CHECK-BE-NEXT: add.w r0, r0, #512 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vldr p0, [r0] ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: bx lr @@ -614,22 +598,18 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_range3(i32* %i, <4 x i32> %a) { define arm_aapcs_vfpcc <4 x i32> @load_bc4_range5(i32* %i, <4 x i32> %a) { ; CHECK-LE-LABEL: load_bc4_range5: ; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: movw r1, #65024 +; CHECK-LE-NEXT: sub.w r0, r0, #512 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0 -; CHECK-LE-NEXT: movt r1, #65535 -; CHECK-LE-NEXT: ldr r0, [r0, r1] -; CHECK-LE-NEXT: vmsr p0, r0 +; CHECK-LE-NEXT: vldr p0, [r0] ; CHECK-LE-NEXT: vpsel q0, q0, q1 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: load_bc4_range5: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: movw r1, #65024 +; CHECK-BE-NEXT: sub.w r0, r0, #512 ; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movt r1, #65535 +; CHECK-BE-NEXT: vldr p0, [r0] ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: ldr r0, [r0, r1] -; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: bx lr From 740625fecd1a4cd8e5521bd1c98627eca6f7565d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 4 Sep 2020 11:41:07 +0100 Subject: [PATCH 185/465] [X86] Make lowerShuffleAsLanePermuteAndPermute use sublanes on AVX2 Extends lowerShuffleAsLanePermuteAndPermute to search for opportunities to use vpermq (64-bit cross-lane shuffle) and vpermd (32-bit cross-lane shuffle) to get elements into the correct lane, in addition to the 128-bit full-lane permutes it previously searched for. This is especially helpful in cross-lane byte shuffles, where the alternative tends to be "vpshufb both lanes separately and blend them with a vpblendvb", which is very expensive, especially on Haswell where vpblendvb uses the same execution port as all the shuffles. Addresses PR47262 Patch By: @TellowKrinkle (TellowKrinkle) Differential Revision: https://reviews.llvm.org/D86429 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 119 ++-- llvm/test/CodeGen/X86/oddshuffles.ll | 66 +- .../CodeGen/X86/vector-shuffle-256-v16.ll | 584 ++++++++++-------- .../CodeGen/X86/vector-shuffle-256-v32.ll | 410 +++++------- .../CodeGen/X86/vector-shuffle-512-v32.ll | 12 +- .../X86/vector-shuffle-combining-avx2.ll | 4 +- .../CodeGen/X86/vector-shuffle-combining.ll | 65 +- 7 files changed, 628 insertions(+), 632 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7a97527ecdf25..517e6c0931805 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -15550,53 +15550,94 @@ static SDValue lowerShuffleAsLanePermuteAndPermute( int NumElts = VT.getVectorNumElements(); int NumLanes = VT.getSizeInBits() / 128; int NumEltsPerLane = NumElts / NumLanes; + bool CanUseSublanes = Subtarget.hasAVX2() && V2.isUndef(); + + /// Attempts to find a sublane permute with the given size + /// that gets all elements into their target lanes. + /// + /// If successful, fills CrossLaneMask and InLaneMask and returns true. + /// If unsuccessful, returns false and may overwrite InLaneMask. + auto getSublanePermute = [&](int NumSublanes) -> SDValue { + int NumSublanesPerLane = NumSublanes / NumLanes; + int NumEltsPerSublane = NumElts / NumSublanes; + + SmallVector CrossLaneMask; + SmallVector InLaneMask(NumElts, SM_SentinelUndef); + // CrossLaneMask but one entry == one sublane. + SmallVector CrossLaneMaskLarge(NumSublanes, SM_SentinelUndef); - SmallVector SrcLaneMask(NumLanes, SM_SentinelUndef); - SmallVector PermMask(NumElts, SM_SentinelUndef); - - for (int i = 0; i != NumElts; ++i) { - int M = Mask[i]; - if (M < 0) - continue; + for (int i = 0; i != NumElts; ++i) { + int M = Mask[i]; + if (M < 0) + continue; - // Ensure that each lane comes from a single source lane. - int SrcLane = M / NumEltsPerLane; - int DstLane = i / NumEltsPerLane; - if (!isUndefOrEqual(SrcLaneMask[DstLane], SrcLane)) - return SDValue(); - SrcLaneMask[DstLane] = SrcLane; + int SrcSublane = M / NumEltsPerSublane; + int DstLane = i / NumEltsPerLane; - PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane); - } + // We only need to get the elements into the right lane, not sublane. + // So search all sublanes that make up the destination lane. + bool Found = false; + int DstSubStart = DstLane * NumSublanesPerLane; + int DstSubEnd = DstSubStart + NumSublanesPerLane; + for (int DstSublane = DstSubStart; DstSublane < DstSubEnd; ++DstSublane) { + if (!isUndefOrEqual(CrossLaneMaskLarge[DstSublane], SrcSublane)) + continue; - // Make sure we set all elements of the lane mask, to avoid undef propagation. - SmallVector LaneMask(NumElts, SM_SentinelUndef); - for (int DstLane = 0; DstLane != NumLanes; ++DstLane) { - int SrcLane = SrcLaneMask[DstLane]; - if (0 <= SrcLane) - for (int j = 0; j != NumEltsPerLane; ++j) { - LaneMask[(DstLane * NumEltsPerLane) + j] = - (SrcLane * NumEltsPerLane) + j; + Found = true; + CrossLaneMaskLarge[DstSublane] = SrcSublane; + int DstSublaneOffset = DstSublane * NumEltsPerSublane; + InLaneMask[i] = DstSublaneOffset + M % NumEltsPerSublane; + break; } - } + if (!Found) + return SDValue(); + } - // If we're only shuffling a single lowest lane and the rest are identity - // then don't bother. - // TODO - isShuffleMaskInputInPlace could be extended to something like this. - int NumIdentityLanes = 0; - bool OnlyShuffleLowestLane = true; - for (int i = 0; i != NumLanes; ++i) { - if (isSequentialOrUndefInRange(PermMask, i * NumEltsPerLane, NumEltsPerLane, - i * NumEltsPerLane)) - NumIdentityLanes++; - else if (SrcLaneMask[i] != 0 && SrcLaneMask[i] != NumLanes) - OnlyShuffleLowestLane = false; - } - if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1)) + // Fill CrossLaneMask using CrossLaneMaskLarge. + narrowShuffleMaskElts(NumEltsPerSublane, CrossLaneMaskLarge, CrossLaneMask); + + if (!CanUseSublanes) { + // If we're only shuffling a single lowest lane and the rest are identity + // then don't bother. + // TODO - isShuffleMaskInputInPlace could be extended to something like + // this. + int NumIdentityLanes = 0; + bool OnlyShuffleLowestLane = true; + for (int i = 0; i != NumLanes; ++i) { + int LaneOffset = i * NumEltsPerLane; + if (isSequentialOrUndefInRange(InLaneMask, LaneOffset, NumEltsPerLane, + i * NumEltsPerLane)) + NumIdentityLanes++; + else if (CrossLaneMask[LaneOffset] != 0) + OnlyShuffleLowestLane = false; + } + if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1)) + return SDValue(); + } + + SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask); + return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT), + InLaneMask); + }; + + // First attempt a solution with full lanes. + if (SDValue V = getSublanePermute(/*NumSublanes=*/NumLanes)) + return V; + + // The rest of the solutions use sublanes. + if (!CanUseSublanes) + return SDValue(); + + // Then attempt a solution with 64-bit sublanes (vpermq). + if (SDValue V = getSublanePermute(/*NumSublanes=*/NumLanes * 2)) + return V; + + // If that doesn't work and we have fast variable shuffle, + // attempt 32-bit sublanes (vpermd). + if (!Subtarget.hasFastVariableShuffle()) return SDValue(); - SDValue LanePermute = DAG.getVectorShuffle(VT, DL, V1, V2, LaneMask); - return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask); + return getSublanePermute(/*NumSublanes=*/NumLanes * 4); } /// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index f5f8781d80213..732395ee0f2dd 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -1132,28 +1132,50 @@ define void @interleave_24i16_in(<24 x i16>* %p, <8 x i16>* %q1, <8 x i16>* %q2, ; AVX1-NEXT: vmovdqu %xmm2, 16(%rdi) ; AVX1-NEXT: retq ; -; AVX2-LABEL: interleave_24i16_in: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqu (%rsi), %xmm0 -; AVX2-NEXT: vmovdqu (%rdx), %xmm1 -; AVX2-NEXT: vmovdqu (%rcx), %xmm2 -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 -; AVX2-NEXT: vpshufb {{.*#+}} ymm4 = ymm3[0,1,u,u,6,7,2,3,u,u,8,9,4,5,u,u,16,17,u,u,22,23,18,19,u,u,24,25,20,21,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[u,u,0,1,u,u,u,u,2,3,u,u,u,u,4,5,u,u,22,23,u,u,u,u,24,25,u,u,u,u,26,27] -; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7],ymm4[8],ymm3[9],ymm4[10,11],ymm3[12],ymm4[13,14],ymm3[15] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = -; AVX2-NEXT: vpermd %ymm2, %ymm4, %ymm4 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255] -; AVX2-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 -; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,u,u,10,11,8,9,u,u,14,15,12,13,u,u] -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7] -; AVX2-NEXT: vmovdqu %xmm0, 32(%rdi) -; AVX2-NEXT: vmovdqu %ymm3, (%rdi) -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: interleave_24i16_in: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vmovdqu (%rsi), %xmm0 +; AVX2-SLOW-NEXT: vmovdqu (%rdx), %xmm1 +; AVX2-SLOW-NEXT: vmovdqu (%rcx), %xmm2 +; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm4 = ymm3[0,1,u,u,6,7,2,3,u,u,8,9,4,5,u,u,16,17,u,u,22,23,18,19,u,u,24,25,20,21,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[2,3,0,1] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[u,u,0,1,u,u,u,u,2,3,u,u,u,u,4,5,u,u,22,23,u,u,u,u,24,25,u,u,u,u,26,27] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7],ymm4[8],ymm3[9],ymm4[10,11],ymm3[12],ymm4[13,14],ymm3[15] +; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm4 = +; AVX2-SLOW-NEXT: vpermd %ymm2, %ymm4, %ymm4 +; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255] +; AVX2-SLOW-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 +; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,u,u,10,11,8,9,u,u,14,15,12,13,u,u] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7] +; AVX2-SLOW-NEXT: vmovdqu %xmm0, 32(%rdi) +; AVX2-SLOW-NEXT: vmovdqu %ymm3, (%rdi) +; AVX2-SLOW-NEXT: vzeroupper +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: interleave_24i16_in: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqu (%rsi), %xmm0 +; AVX2-FAST-NEXT: vmovdqu (%rdx), %xmm1 +; AVX2-FAST-NEXT: vmovdqu (%rcx), %xmm2 +; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = +; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm4 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = [0,4,1,5,1,5,2,6] +; AVX2-FAST-NEXT: vpermd %ymm3, %ymm5, %ymm3 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[0,1,4,5,u,u,2,3,6,7,u,u,8,9,12,13,u,u,18,19,22,23,u,u,24,25,28,29,u,u,26,27] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255] +; AVX2-FAST-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 +; AVX2-FAST-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,u,u,10,11,8,9,u,u,14,15,12,13,u,u] +; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] +; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7] +; AVX2-FAST-NEXT: vmovdqu %xmm0, 32(%rdi) +; AVX2-FAST-NEXT: vmovdqu %ymm3, (%rdi) +; AVX2-FAST-NEXT: vzeroupper +; AVX2-FAST-NEXT: retq ; ; XOP-LABEL: interleave_24i16_in: ; XOP: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index bd5fddb1512dc..ec775e9155721 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -393,10 +393,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17] -; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: @@ -416,10 +414,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17] -; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0 -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -439,10 +435,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: @@ -462,10 +456,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -482,19 +474,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] -; AVX2-FAST-NEXT: retq +; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # %bb.0: @@ -513,9 +497,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -532,19 +515,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] -; AVX2-FAST-NEXT: retq +; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # %bb.0: @@ -563,9 +538,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -584,7 +558,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; @@ -605,7 +579,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -625,7 +599,7 @@ define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; @@ -646,7 +620,7 @@ define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -666,7 +640,7 @@ define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; @@ -687,7 +661,7 @@ define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -707,8 +681,8 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,8,9,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -728,8 +702,8 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,8,9,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3420,9 +3394,8 @@ define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_1 ; ; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2] -; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5,14,15,16,17,18,19,16,17,18,19,20,21,22,23,20,21,22,23] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: @@ -3441,9 +3414,8 @@ define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2] -; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5,14,15,16,17,18,19,16,17,18,19,20,21,22,23,20,21,22,23] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3548,22 +3520,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3] -; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,u,u] -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-FAST-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-FAST-NEXT: retq +; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] +; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: ; AVX512VL: # %bb.0: @@ -3582,11 +3543,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_0 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3] -; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17] -; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] -; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3616,10 +3574,9 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_1 ; ; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,u,u,16,17,16,17,16,17,16,17,24,25,24,25,24,25,u,u] -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-FAST-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,4,5,4,5,4,5,8,9,16,17,16,17,16,17,16,17,20,21,20,21,20,21,20,21] ; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: @@ -3662,9 +3619,8 @@ define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_1 ; ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15] -; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: @@ -3683,9 +3639,8 @@ define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15] -; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3704,9 +3659,8 @@ define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_1 ; ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: ; AVX2: # %bb.0: -; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: @@ -3725,9 +3679,8 @@ define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3796,13 +3749,20 @@ define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,4,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,4,5,4,5,4,5,8,9,16,17,16,17,16,17,16,17,20,21,20,21,20,21,20,21] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: ; AVX512VL: # %bb.0: @@ -4018,13 +3978,20 @@ define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,2,3,8,9,18,19,16,17,22,23,20,21,22,23,20,21,18,19,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: ; AVX512VL: # %bb.0: @@ -4062,13 +4029,20 @@ define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,4,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,2,3,0,1,6,7,8,9,18,19,16,17,22,23,20,21,18,19,16,17,22,23,20,21] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: ; AVX512VL: # %bb.0: @@ -4106,13 +4080,20 @@ define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,2,3,8,9,18,19,16,17,22,23,20,21,22,23,20,21,18,19,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: ; AVX512VL: # %bb.0: @@ -4150,13 +4131,20 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,4,5,4,5,8,9,16,17,20,21,20,21,16,17,16,17,20,21,20,21,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: ; AVX512VL: # %bb.0: @@ -4194,13 +4182,20 @@ define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,4,5,4,5,8,9,16,17,20,21,20,21,16,17,16,17,20,21,20,21,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: ; AVX512VL: # %bb.0: @@ -4364,12 +4359,19 @@ define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7] -; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,0,5,7,6,4,5] +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,2,3,6,7,10,11,0,1,4,5,14,15,16,17,16,17,18,19,22,23,26,27,16,17,20,21,30,31] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: ; AVX512VL: # %bb.0: @@ -4406,13 +4408,20 @@ define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,4,5,4,5,4,5,8,9,16,17,16,17,20,21,20,21,20,21,20,21,20,21,20,21] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4450,13 +4459,20 @@ define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,0,1,0,1,0,1,8,9,16,17,16,17,20,21,20,21,16,17,16,17,16,17,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4494,13 +4510,20 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,4,5,4,5,4,5,8,9,16,17,20,21,20,21,16,17,20,21,20,21,20,21,20,21] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4538,13 +4561,20 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,0,1,0,1,8,9,16,17,20,21,20,21,16,17,16,17,16,17,16,17,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: ; AVX512VL: # %bb.0: @@ -4593,9 +4623,9 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_1 ; ; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,u,u,16,17,24,25,24,25,16,17,24,25,26,27,28,29,u,u] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,3,7,4,6,7,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,4,5,6,7,8,9,14,15,16,17,20,21,20,21,16,17,20,21,22,23,24,25,26,27] ; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: @@ -4635,13 +4665,20 @@ define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,4,5,4,5,4,5,4,5,4,5,8,9,16,17,u,u,20,21,20,21,20,21,20,21,20,21,20,21] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4679,13 +4716,20 @@ define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,u,u,4,5,0,1,0,1,0,1,8,9,16,17,16,17,u,u,20,21,16,17,16,17,16,17,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4723,13 +4767,20 @@ define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,0,1,4,5,0,1,0,1,0,1,8,9,u,u,16,17,16,17,20,21,16,17,16,17,16,17,16,17] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: ; AVX512VL: # %bb.0: @@ -4800,13 +4851,13 @@ define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_1 ; ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,2] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,10,11,0,1,2,3,4,5,14,15,16,17,18,19,24,25,26,27,16,17,18,19,20,21,30,31] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,8,9,4,5,6,11,12,13,8,9,12,13,14,11] ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; @@ -4821,8 +4872,8 @@ define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,2] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,10,11,0,1,2,3,4,5,14,15,16,17,18,19,24,25,26,27,16,17,18,19,20,21,30,31] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -4984,12 +5035,19 @@ define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_1 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7] -; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27] -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,0,6,5,7,4,6] +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,6,7,10,11,8,9,0,1,6,7,2,3,14,15,18,19,22,23,26,27,24,25,16,17,22,23,18,19,30,31] +; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: ; AVX512VL: # %bb.0: @@ -5028,9 +5086,10 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_2 ; ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7,8,9,10],ymm2[11],ymm1[12,13,14,15] -; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: @@ -5051,9 +5110,10 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_2 ; ; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7,8,9,10],ymm2[11],ymm1[12,13,14,15] -; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5122,10 +5182,9 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_3 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: ; AVX2: # %bb.0: ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3] -; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6],ymm1[7] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: @@ -5147,10 +5206,9 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_3 ; XOPAVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6],ymm1[7] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5235,9 +5293,9 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_3 ; ; AVX2-FAST-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,2,3,12,13,12,13,u,u,u,u,16,17,16,17,20,21,18,19,28,29,28,29,u,u,u,u] -; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6],ymm2[7] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,3,7,u,4,7,u,u> +; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,10,11,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,28,29,u,u,30,31,u,u] ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; AVX2-FAST-NEXT: retq @@ -5300,9 +5358,9 @@ define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_2 ; ; AVX2-FAST-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,0,1,u,u,2,3,u,u,24,25,u,u,26,27,u,u,16,17,u,u,18,19] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <2,0,4,u,6,4,u,u> +; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,10,11,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,28,29,u,u,30,31,u,u] ; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; AVX2-FAST-NEXT: retq @@ -5354,19 +5412,18 @@ define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_2 ; ; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7,8,9],ymm2[10],ymm1[11,12,13,14,15] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7,8,9],ymm2[10],ymm1[11,12,13,14,15] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,4,5,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21] ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,u,u,u,u,6,7,4,5,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21,u,u,u,u] +; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21] ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; AVX2-FAST-NEXT: retq ; @@ -5389,11 +5446,11 @@ define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_2 ; ; XOPAVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7,8,9],ymm2[10],ymm1[11,12,13,14,15] -; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21] ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] -; XOPAVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7] +; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5412,9 +5469,10 @@ define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_1 ; ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6,7,8,9,10],ymm2[11],ymm0[12,13,14,15] -; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] +; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: @@ -5436,9 +5494,10 @@ define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_1 ; ; XOPAVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] -; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6,7,8,9,10],ymm2[11],ymm0[12,13,14,15] -; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] +; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] +; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -5458,9 +5517,8 @@ define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_1 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: ; AVX2: # %bb.0: ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3],ymm2[4,5,6],ymm0[7] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] ; AVX2-NEXT: retq ; @@ -5484,9 +5542,8 @@ define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_1 ; XOPAVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3],ymm2[4,5,6],ymm0[7] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -7293,16 +7350,13 @@ define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) { ; ; AVX2-FAST-LABEL: PR24935: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17] -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX2-FAST-NEXT: vpor %ymm2, %ymm1, %ymm1 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,2,3,2,3,u,u,10,11,u,u,u,u,u,u,u,u,18,19,18,19,u,u,26,27,u,u,u,u,u,u] -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15] -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255] -; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,5,u,u,0,4,6,2> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[2,3],zero,zero,zero,zero,zero,zero,ymm0[6,7],zero,zero,ymm0[18,19,22,23],zero,zero,zero,zero,ymm0[26,27,28,29,16,17],zero,zero +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <5,6,3,0,0,6,4,u> +; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[2,3,0,1],zero,zero,ymm1[6,7,0,1,10,11],zero,zero,ymm1[12,13],zero,zero,zero,zero,ymm1[16,17,20,21],zero,zero,zero,zero,zero,zero,ymm1[24,25] +; AVX2-FAST-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-FAST-NEXT: retq ; ; AVX512VL-LABEL: PR24935: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index e06b75355f6fb..a7e65f10a3604 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -800,21 +800,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm0, %ymm1 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16] -; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512VLBW-NEXT: movl $-2147450880, %eax # imm = 0x80008000 -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -834,11 +827,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastb %xmm0, %ymm1 -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -857,19 +847,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-NEXT: movl $1, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -889,10 +874,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -911,17 +894,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm1 = <0,9,u,u,u,u,u,u,0,u,u,u,u,u,u,u> -; AVX512VLBW-NEXT: vpermw %ymm0, %ymm1, %ymm0 -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -941,10 +921,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -963,17 +941,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm1 = <0,9,u,u,u,u,u,u,0,u,u,u,u,u,u,u> -; AVX512VLBW-NEXT: vpermw %ymm0, %ymm1, %ymm0 -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -993,10 +968,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1013,33 +986,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-FAST-NEXT: retq -; -; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-SLOW: # %bb.0: -; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-SLOW-NEXT: retq +; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: retq ; -; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-FAST: # %bb.0: -; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-FAST-NEXT: retq +; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLVBMI: # %bb.0: @@ -1058,9 +1015,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1077,33 +1033,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-FAST-NEXT: retq -; -; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-SLOW: # %bb.0: -; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-SLOW-NEXT: retq +; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: retq ; -; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-FAST: # %bb.0: -; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-FAST-NEXT: retq +; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLVBMI: # %bb.0: @@ -1122,9 +1062,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1141,33 +1080,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-FAST-NEXT: retq -; -; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-SLOW: # %bb.0: -; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-SLOW-NEXT: retq +; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: retq ; -; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-FAST: # %bb.0: -; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-FAST-NEXT: retq +; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLVBMI: # %bb.0: @@ -1186,9 +1109,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1205,33 +1127,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-FAST-NEXT: retq -; -; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-SLOW: # %bb.0: -; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-SLOW-NEXT: retq +; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX2-NEXT: retq ; -; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: -; AVX512VLBW-FAST: # %bb.0: -; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u> -; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-FAST-NEXT: retq +; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLVBMI: # %bb.0: @@ -1250,9 +1156,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1271,13 +1176,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1298,7 +1203,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1318,13 +1223,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1345,7 +1250,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1365,13 +1270,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1392,7 +1297,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1412,13 +1317,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1439,7 +1344,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1459,13 +1364,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1486,7 +1391,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1506,13 +1411,13 @@ define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1533,7 +1438,7 @@ define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1553,13 +1458,13 @@ define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; @@ -1580,7 +1485,7 @@ define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1600,16 +1505,14 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; AVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0] -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; AVX512VLBW-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: @@ -1629,9 +1532,8 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_ ; ; XOPAVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1] -; XOPAVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -3360,34 +3262,61 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: -; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[u,u],zero,zero,ymm1[12],zero,ymm1[u,u,u],zero,zero,ymm1[u,0,3,u,u],zero,ymm1[u],zero,zero,ymm1[21,16],zero,ymm1[26],zero,ymm1[u,20,18,20,23] -; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7] -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] -; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[u,u],zero,zero,ymm1[12],zero,ymm1[u,u,u],zero,zero,ymm1[u,0,3,u,u],zero,ymm1[u],zero,zero,ymm1[21,16],zero,ymm1[26],zero,ymm1[u,20,18,20,23] +; AVX2-SLOW-NEXT: vpor %ymm1, %ymm2, %ymm1 +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7] +; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] +; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: retq ; -; AVX512VLBW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] -; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7] -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13,u,u,3,3],zero,ymm1[8,u,u,u,12,1,u],zero,zero,ymm1[u,u,20,u,17,22],zero,zero,ymm1[16],zero,ymm1[27,u],zero,zero,zero,zero -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23] -; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 -; AVX512VLBW-NEXT: movl $134948620, %eax # imm = 0x80B270C -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1} -; AVX512VLBW-NEXT: retq +; AVX2-FAST-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero +; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[u,u],zero,zero,ymm1[12],zero,ymm1[u,u,u],zero,zero,ymm1[u,0,3,u,u],zero,ymm1[u],zero,zero,ymm1[21,16],zero,ymm1[26],zero,ymm1[u,20,18,20,23] +; AVX2-FAST-NEXT: vpor %ymm1, %ymm2, %ymm1 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <3,4,5,7,5,4,1,u> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0 +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,u,u,5,10,13,u,u,0,u,u,16,23,u,23,u,u,u,u,u,u,u,27,u,u,u,u] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] +; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX2-FAST-NEXT: retq +; +; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: +; AVX512VLBW-SLOW: # %bb.0: +; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] +; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7] +; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13,u,u,3,3],zero,ymm1[8,u,u,u,12,1,u],zero,zero,ymm1[u,u,20,u,17,22],zero,zero,ymm1[16],zero,ymm1[27,u],zero,zero,zero,zero +; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23] +; AVX512VLBW-SLOW-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512VLBW-SLOW-NEXT: movl $134948620, %eax # imm = 0x80B270C +; AVX512VLBW-SLOW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-SLOW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1} +; AVX512VLBW-SLOW-NEXT: retq +; +; AVX512VLBW-FAST-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: +; AVX512VLBW-FAST: # %bb.0: +; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <3,4,5,7,5,4,1,u> +; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm2 +; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13,u,u,3,3],zero,ymm1[8,u,u,u,12,1,u],zero,zero,ymm1[u,u,20,u,17,22],zero,zero,ymm1[16],zero,ymm1[27,u],zero,zero,zero,zero +; AVX512VLBW-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23] +; AVX512VLBW-FAST-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512VLBW-FAST-NEXT: movl $134948620, %eax # imm = 0x80B270C +; AVX512VLBW-FAST-NEXT: kmovd %eax, %k1 +; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[u,u,0,1,u,u,u,u,5,10,13,u,u,0,u,u,16,23,u,23,u,u,u,u,u,u,u,27,u,u,u,u] +; AVX512VLBW-FAST-NEXT: retq + ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: ; AVX512VLVBMI: # %bb.0: @@ -4405,10 +4334,8 @@ define <32 x i8> @shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_ ; ; AVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7],zero,zero,zero,zero,ymm0[24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7,24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31],zero,zero -; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: @@ -4427,10 +4354,8 @@ define <32 x i8> @shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_ ; ; XOPAVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7],zero,zero,zero,zero,ymm0[24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7,24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31],zero,zero -; XOPAVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31] ; XOPAVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -4942,21 +4867,14 @@ define <32 x i8> @PR47262(<4 x i64> %a0) { ; ; AVX2-LABEL: PR47262: ; AVX2: # %bb.0: -; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7],zero,zero,zero,zero,ymm0[24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31] -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7,24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31],zero,zero -; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31] ; AVX2-NEXT: retq ; ; AVX512VLBW-LABEL: PR47262: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4,u,u,1,5,u,u,2,6,u,u,3,7,u,u,u,u,24,28,u,u,25,29,u,u,26,30,u,u,27,31] -; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,4,u,u,1,5,u,u,2,6,u,u,3,7,24,28,u,u,25,29,u,u,26,30,u,u,27,31,u,u] -; AVX512VLBW-NEXT: movw $21930, %ax # imm = 0x55AA -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} -; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31] ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: PR47262: @@ -4975,10 +4893,8 @@ define <32 x i8> @PR47262(<4 x i64> %a0) { ; ; XOPAVX2-LABEL: PR47262: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7],zero,zero,zero,zero,ymm0[24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31] -; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7,24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31],zero,zero -; XOPAVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 +; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31] ; XOPAVX2-NEXT: retq %t1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> %t2 = bitcast <4 x i64> %t1 to <32 x i8> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll index 4c8073614d6dd..ac6701b383f25 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -66,18 +66,16 @@ define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_1 ; KNL-LABEL: shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38: ; KNL: ## %bb.0: ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; KNL-NEXT: vpermq {{.*#+}} ymm3 = ymm2[2,3,0,1] -; KNL-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5],ymm2[6],ymm3[7],ymm2[8,9,10,11],ymm3[12,13],ymm2[14],ymm3[15] -; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[u,u,14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,u,u] -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm4 -; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm4[5,6,7],ymm0[8,9,10,11,12],ymm4[13,14,15] -; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17,u,u] +; KNL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[3,1,2,3] +; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[u,u,6,7,u,u,12,13,u,u,2,3,u,u,0,1,u,u,22,23,u,u,20,21,u,u,18,19,u,u,u,u] +; KNL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,2,3] +; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,u,u,4,5,u,u,2,3,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17,u,u] ; KNL-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[3],ymm0[4],ymm3[5],ymm0[6],ymm3[7],ymm0[8],ymm3[9],ymm0[10],ymm3[11],ymm0[12],ymm3[13],ymm0[14],ymm3[15] ; KNL-NEXT: vextracti32x4 $3, %zmm1, %xmm1 ; KNL-NEXT: vpbroadcastw %xmm1, %ymm1 ; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm3[0,1,2,3,4,5,6],ymm1[7],ymm3[8,9,10,11,12,13,14],ymm1[15] ; KNL-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] -; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17] +; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,6,7,u,u,12,13,u,u,2,3,u,u,0,1,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17] ; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; KNL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 785202c88b6da..a4d7b26ef8690 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -807,8 +807,8 @@ entry: define <32 x i8> @PR27320(<8 x i32> %a0) { ; CHECK-LABEL: PR27320: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1] -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,1,2,3,4,4,5,6,7,7,8,9,10,10,11,28,29,29,30,31,16,16,17,18,19,19,20,21,22,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,1,2,3,4,4,5,6,7,7,8,9,10,10,11,20,21,21,22,23,24,24,25,26,27,27,28,29,30,30,31] ; CHECK-NEXT: ret{{[l|q]}} %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> %2 = bitcast <8 x i32> %1 to <32 x i8> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 5b3b1d4fba183..8bdd2451434e6 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -3248,56 +3248,21 @@ define void @PR45604(<32 x i16>* %dst, <8 x i16>* %src) { ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: PR45604: -; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa (%rsi), %xmm0 -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,0,0,0,0,0,0,0,0] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm2 = ymm1[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,0,2,1,4,4,6,5] -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm4 = [0,0,0,0,0,0,0,0,11,11,11,11,11,11,11,11] -; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm2, %ymm4, %ymm2 -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm5 = ymm0[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,1,3,4,5,5,7] -; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm6 = ymm0[2,3,0,1] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm7 = ymm6[2,0,3,1,4,5,6,7,10,8,11,9,12,13,14,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm7 = ymm7[0,1,1,3,4,5,5,7] -; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm8 = <255,255,0,0,u,u,u,u,255,255,0,0,u,u,u,u,0,0,255,255,u,u,u,u,0,0,255,255,u,u,u,u> -; AVX2-SLOW-NEXT: vpblendvb %ymm8, %ymm5, %ymm7, %ymm5 -; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] -; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm1, %ymm4, %ymm1 -; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,3,3,6,5,7,7] -; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm3 = ymm6[0,1,2,3,6,4,7,5,8,9,10,11,14,12,15,13] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[2,1,3,3,6,5,7,7] -; AVX2-SLOW-NEXT: vpblendvb %ymm8, %ymm0, %ymm3, %ymm0 -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm5[0],ymm2[1],ymm5[2],ymm2[3],ymm5[4],ymm2[5],ymm5[6],ymm2[7] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] -; AVX2-SLOW-NEXT: vmovdqu %ymm0, 32(%rdi) -; AVX2-SLOW-NEXT: vmovdqu %ymm2, (%rdi) -; AVX2-SLOW-NEXT: vzeroupper -; AVX2-SLOW-NEXT: retq -; -; AVX2-FAST-LABEL: PR45604: -; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm0 = -; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,11,11,11,11,11,11,11,11] -; AVX2-FAST-NEXT: vpblendvb %ymm0, {{.*}}(%rip), %ymm1, %ymm0 -; AVX2-FAST-NEXT: vmovdqa (%rsi), %xmm1 -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[0,1],zero,zero,ymm1[u,u,u,u,2,3],zero,zero,ymm1[u,u,u,u],zero,zero,ymm1[20,21,u,u,u,u],zero,zero,ymm1[22,23,u,u,u,u] -; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm3 = ymm1[2,3,0,1] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm4 = zero,zero,ymm3[0,1,u,u,u,u],zero,zero,ymm3[2,3,u,u,u,u,20,21],zero,zero,ymm3[u,u,u,u,22,23],zero,zero,ymm3[u,u,u,u] -; AVX2-FAST-NEXT: vpor %ymm4, %ymm2, %ymm2 -; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm0[1],ymm2[2],ymm0[3],ymm2[4],ymm0[5],ymm2[6],ymm0[7] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[8,9],zero,zero,ymm1[u,u,u,u,10,11],zero,zero,ymm1[u,u,u,u],zero,zero,ymm1[28,29,u,u,u,u],zero,zero,ymm1[30,31,u,u,u,u] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm3 = zero,zero,ymm3[8,9,u,u,u,u],zero,zero,ymm3[10,11,u,u,u,u,28,29],zero,zero,ymm3[u,u,u,u,30,31],zero,zero,ymm3[u,u,u,u] -; AVX2-FAST-NEXT: vpor %ymm3, %ymm1, %ymm1 -; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] -; AVX2-FAST-NEXT: vmovdqu %ymm0, 32(%rdi) -; AVX2-FAST-NEXT: vmovdqu %ymm2, (%rdi) -; AVX2-FAST-NEXT: vzeroupper -; AVX2-FAST-NEXT: retq +; AVX2-LABEL: PR45604: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqa (%rsi), %xmm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,2,0,2] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15,u,u,u,u> +; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm3[1],ymm1[2],ymm3[3],ymm1[4],ymm3[5],ymm1[6],ymm3[7] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,1,3] +; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2],ymm3[3],ymm0[4],ymm3[5],ymm0[6],ymm3[7] +; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) +; AVX2-NEXT: vmovdqu %ymm1, (%rdi) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq %v1 = load <8 x i16>, <8 x i16>* %src, align 16 %v2 = shufflevector <8 x i16> %v1, <8 x i16> zeroinitializer, <16 x i32> %v3 = shufflevector <16 x i16> %v2, <16 x i16> , <32 x i32> From 7582c5c023a8d6bff224e80dc5ded916122d8c99 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 4 Sep 2020 12:16:48 +0100 Subject: [PATCH 186/465] CallingConvLower.h - remove unnecessary MachineFunction.h include. NFC. Reduce to forward declaration, add the Register.h include that we still needed, move CCState::ensureMaxAlignment into CallingConvLower.cpp as it was the only function that needed the full definition of MachineFunction. Fix a few implicit dependencies further down. --- llvm/include/llvm/CodeGen/CallingConvLower.h | 8 +++----- llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h | 4 ++-- llvm/lib/CodeGen/CallingConvLower.cpp | 6 ++++++ llvm/lib/Target/Mips/MipsCallLowering.h | 1 + 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 8ebe788ac360a..52c88d8af45b1 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -16,7 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCRegisterInfo.h" @@ -25,6 +25,7 @@ namespace llvm { class CCState; +class MachineFunction; class MVT; class TargetRegisterInfo; @@ -432,10 +433,7 @@ class CCState { return AllocateStack(Size, Align(Alignment)); } - void ensureMaxAlignment(Align Alignment) { - if (!AnalyzingMustTailForwardedRegs) - MF.getFrameInfo().ensureMaxAlignment(Alignment); - } + void ensureMaxAlignment(Align Alignment); /// Version of AllocateStack with extra register to be shadowed. LLVM_ATTRIBUTE_DEPRECATED(unsigned AllocateStack(unsigned Size, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index ef93042f6690d..dbd7e00c429aa 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -17,8 +17,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include @@ -30,11 +32,9 @@ class CallBase; class DataLayout; class Function; class MachineIRBuilder; -class MachineOperand; struct MachinePointerInfo; class MachineRegisterInfo; class TargetLowering; -class Type; class Value; class CallLowering { diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index 3d8c2c8b00aa7..9662a583e3694 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -184,6 +185,11 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { } } +void CCState::ensureMaxAlignment(Align Alignment) { + if (!AnalyzingMustTailForwardedRegs) + MF.getFrameInfo().ensureMaxAlignment(Alignment); +} + static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { if (VT.isVector()) return true; // Assume -msse-regparm might be in effect. diff --git a/llvm/lib/Target/Mips/MipsCallLowering.h b/llvm/lib/Target/Mips/MipsCallLowering.h index a284cf5e26cf5..6e43e55cee9b1 100644 --- a/llvm/lib/Target/Mips/MipsCallLowering.h +++ b/llvm/lib/Target/Mips/MipsCallLowering.h @@ -18,6 +18,7 @@ namespace llvm { +class MachineMemOperand; class MipsTargetLowering; class MipsCallLowering : public CallLowering { From 2c9dbcda4f71497d4a58020bb093af438fb6e967 Mon Sep 17 00:00:00 2001 From: Vaibhav Garg Date: Fri, 4 Sep 2020 10:49:11 +0000 Subject: [PATCH 187/465] [modules] Correctly parse LateParsedTemplates in case of dependent modules. While parsing LateParsedTemplates, Clang assumes that the Global DeclID matches with the Local DeclID of a Decl. This is not the case when we have multiple dependent modules , each having their own LateParsedTemplate section. In such a case, a Local/Global DeclID confusion occurs which leads to improper casting of FunctionDecl's. This commit creates a Vector to map the LateParsedTemplate section of each Module with their module file and therefore resolving the Global/Local DeclID confusion. Reviewed By: rsmith Differential Revision: https://reviews.llvm.org/D86514 --- clang/include/clang/Serialization/ASTReader.h | 5 +-- clang/lib/Serialization/ASTReader.cpp | 35 +++++++++++-------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index b6892e295ac7c..29c4f15e57b09 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -900,8 +900,9 @@ class ASTReader /// Delete expressions to analyze at the end of translation unit. SmallVector DelayedDeleteExprs; - // A list of late parsed template function data. - SmallVector LateParsedTemplates; + // A list of late parsed template function data with their module files. + SmallVector>, 4> + LateParsedTemplates; /// The IDs of all decls to be checked for deferred diags. /// diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 55d68a7c6919b..6f5fa67117c09 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3722,7 +3722,9 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { } case LATE_PARSED_TEMPLATE: - LateParsedTemplates.append(Record.begin(), Record.end()); + LateParsedTemplates.emplace_back( + std::piecewise_construct, std::forward_as_tuple(&F), + std::forward_as_tuple(Record.begin(), Record.end())); break; case OPTIMIZE_PRAGMA_OPTIONS: @@ -8389,25 +8391,28 @@ void ASTReader::ReadPendingInstantiations( void ASTReader::ReadLateParsedTemplates( llvm::MapVector> &LPTMap) { - for (unsigned Idx = 0, N = LateParsedTemplates.size(); Idx < N; - /* In loop */) { - FunctionDecl *FD = cast(GetDecl(LateParsedTemplates[Idx++])); + for (auto &LPT : LateParsedTemplates) { + ModuleFile *FMod = LPT.first; + RecordDataImpl &LateParsed = LPT.second; + for (unsigned Idx = 0, N = LateParsed.size(); Idx < N; + /* In loop */) { + FunctionDecl *FD = + cast(GetLocalDecl(*FMod, LateParsed[Idx++])); - auto LT = std::make_unique(); - LT->D = GetDecl(LateParsedTemplates[Idx++]); + auto LT = std::make_unique(); + LT->D = GetLocalDecl(*FMod, LateParsed[Idx++]); - ModuleFile *F = getOwningModuleFile(LT->D); - assert(F && "No module"); + ModuleFile *F = getOwningModuleFile(LT->D); + assert(F && "No module"); - unsigned TokN = LateParsedTemplates[Idx++]; - LT->Toks.reserve(TokN); - for (unsigned T = 0; T < TokN; ++T) - LT->Toks.push_back(ReadToken(*F, LateParsedTemplates, Idx)); + unsigned TokN = LateParsed[Idx++]; + LT->Toks.reserve(TokN); + for (unsigned T = 0; T < TokN; ++T) + LT->Toks.push_back(ReadToken(*F, LateParsed, Idx)); - LPTMap.insert(std::make_pair(FD, std::move(LT))); + LPTMap.insert(std::make_pair(FD, std::move(LT))); + } } - - LateParsedTemplates.clear(); } void ASTReader::LoadSelector(Selector Sel) { From 101f37a1b330e3f0ae57762db47bba28f72cf50d Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Fri, 4 Sep 2020 12:29:40 +0200 Subject: [PATCH 188/465] [lldb][NFC] Rewrite CPP11EnumTypes test to make it faster TestCPP11EnumTypes is one of the most expensive tests on my system and takes around 35 seconds to run. A relatively large amount of that time is actually doing CPU intensive work it seems (and not waiting on timeouts like other slow tests). The main issue is that this test repeatedly compiles the same source files with different compiler defines. The test is also including standard library headers, so it will also build all system modules with the gmodules debug info variant. This leads to the problem that this test ends up compiling all system Clang modules 8 times (one for each subtest with a unique define). As the system modules are quite large, this causes that this test spends most of its runtime just recompiling all system modules on macOS. There is also the small issue that this test is starting and start-stopping the test process a few hundred times. This rewrites the test to instead just use a macro to instantiate all the enum types in a single source and uses global variables to test the values (which means there is no more need to continue/stop or even start a process). I kept running all the debug info variants (event though it doesn't seem really relevant) to keep this as NFC as possible. This reduced the test runtime by around 1.5 seconds on my system (or in relative numbers, the runtime of this test decreases by 95%). --- .../lang/cpp/enum_types/TestCPP11EnumTypes.py | 184 ++++-------------- lldb/test/API/lang/cpp/enum_types/main.cpp | 63 +++--- 2 files changed, 66 insertions(+), 181 deletions(-) diff --git a/lldb/test/API/lang/cpp/enum_types/TestCPP11EnumTypes.py b/lldb/test/API/lang/cpp/enum_types/TestCPP11EnumTypes.py index c58f700039eb3..d40eee0cb1b0e 100644 --- a/lldb/test/API/lang/cpp/enum_types/TestCPP11EnumTypes.py +++ b/lldb/test/API/lang/cpp/enum_types/TestCPP11EnumTypes.py @@ -1,7 +1,5 @@ """Look up enum type information and check for correct display.""" - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -12,145 +10,45 @@ class CPP11EnumTypesTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_int8_t(self): - """Test C++11 enumeration class types as int8_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DSIGNED_ENUM_CLASS_TYPE=int8_t"'}) - self.image_lookup_for_enum_type(True) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_int16_t(self): - """Test C++11 enumeration class types as int16_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DSIGNED_ENUM_CLASS_TYPE=int16_t"'}) - self.image_lookup_for_enum_type(True) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_int32_t(self): - """Test C++11 enumeration class types as int32_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DSIGNED_ENUM_CLASS_TYPE=int32_t"'}) - self.image_lookup_for_enum_type(True) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_int64_t(self): - """Test C++11 enumeration class types as int64_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DSIGNED_ENUM_CLASS_TYPE=int64_t"'}) - self.image_lookup_for_enum_type(True) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_uint8_t(self): - """Test C++11 enumeration class types as uint8_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DUNSIGNED_ENUM_CLASS_TYPE=uint8_t"'}) - self.image_lookup_for_enum_type(False) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_uint16_t(self): - """Test C++11 enumeration class types as uint16_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DUNSIGNED_ENUM_CLASS_TYPE=uint16_t"'}) - self.image_lookup_for_enum_type(False) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_uint32_t(self): - """Test C++11 enumeration class types as uint32_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DUNSIGNED_ENUM_CLASS_TYPE=uint32_t"'}) - self.image_lookup_for_enum_type(False) - - @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') - @skipIf(dwarf_version=['<', '4']) - def test_uint64_t(self): - """Test C++11 enumeration class types as uint64_t types.""" - self.build( - dictionary={ - 'CFLAGS_EXTRAS': '"-DUNSIGNED_ENUM_CLASS_TYPE=uint64_t"'}) - self.image_lookup_for_enum_type(False) - - def setUp(self): - # Call super's setUp(). - TestBase.setUp(self) - # Find the line number to break inside main(). - self.line = line_number('main.cpp', '// Set break point at this line.') - - def image_lookup_for_enum_type(self, is_signed): - """Test C++11 enumeration class types.""" - exe = self.getBuildArtifact("a.out") - self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - - # Break inside the main. - bkpt_id = lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line, num_expected_locations=1, loc_exact=True) - - self.runCmd("run", RUN_SUCCEEDED) - - # The stop reason of the thread should be breakpoint. - self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, - substrs=['stopped', - 'stop reason = breakpoint']) - - # The breakpoint should have a hit count of 1. - self.expect("breakpoint list -f", BREAKPOINT_HIT_ONCE, - substrs=[' resolved, hit count = 1']) - - # Look up information about the 'DayType' enum type. - # Check for correct display. - self.expect("image lookup -t DayType", DATA_TYPES_DISPLAYED_CORRECTLY, - patterns=['enum( struct| class) DayType {'], - substrs=['Monday', - 'Tuesday', - 'Wednesday', - 'Thursday', - 'Friday', - 'Saturday', - 'Sunday', - 'kNumDays', - '}']) - - if is_signed: - enum_values = ['-4', - 'Monday', - 'Tuesday', - 'Wednesday', - 'Thursday', - 'Friday', - 'Saturday', - 'Sunday', - 'kNumDays', - '5'] + def check_enum(self, suffix): + """ + :param suffix The suffix of the enum type name (enum_) that + should be checked. + :param test_values A list of integet values that shouldn't be converted + to any valid enum case. + """ + enum_name = "enum_" + suffix + unsigned = suffix.startswith("u") + + self.expect("image lookup -t " + enum_name, + patterns=["enum( struct| class) " + enum_name + " {"], + substrs=["Case1", + "Case2", + "Case3"]) + # Test each case in the enum. + self.expect_expr("var1_" + suffix, result_type=enum_name, result_value="Case1") + self.expect_expr("var2_" + suffix, result_type=enum_name, result_value="Case2") + self.expect_expr("var3_" + suffix, result_type=enum_name, result_value="Case3") + + if unsigned: + self.expect_expr("var_below_" + suffix, result_type=enum_name, result_value="199") + self.expect_expr("var_above_" + suffix, result_type=enum_name, result_value="203") else: - enum_values = ['199', - 'Monday', - 'Tuesday', - 'Wednesday', - 'Thursday', - 'Friday', - 'Saturday', - 'Sunday', - 'kNumDays', - '208'] - - bkpt = self.target().FindBreakpointByID(bkpt_id) - for enum_value in enum_values: - self.expect( - "frame variable day", - 'check for valid enumeration value', - substrs=[enum_value]) - lldbutil.continue_to_breakpoint(self.process(), bkpt) + self.expect_expr("var_below_" + suffix, result_type=enum_name, result_value="-3") + self.expect_expr("var_above_" + suffix, result_type=enum_name, result_value="1") + + @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr36527') + @skipIf(dwarf_version=['<', '4']) + def test(self): + self.build() + target = self.dbg.CreateTarget(self.getBuildArtifact("a.out")) + self.check_enum("uc") + self.check_enum("c") + self.check_enum("us") + self.check_enum("s") + self.check_enum("ui") + self.check_enum("i") + self.check_enum("ul") + self.check_enum("l") + self.check_enum("ull") + self.check_enum("ll") diff --git a/lldb/test/API/lang/cpp/enum_types/main.cpp b/lldb/test/API/lang/cpp/enum_types/main.cpp index d7d428a24432b..be895208c7d40 100644 --- a/lldb/test/API/lang/cpp/enum_types/main.cpp +++ b/lldb/test/API/lang/cpp/enum_types/main.cpp @@ -1,41 +1,28 @@ -#include -#include +#define DEFINE_UNSIGNED_ENUM(suffix, enum_type) \ + enum class enum_##suffix : enum_type{Case1 = 200, Case2, Case3}; \ + enum_##suffix var1_##suffix = enum_##suffix ::Case1; \ + enum_##suffix var2_##suffix = enum_##suffix ::Case2; \ + enum_##suffix var3_##suffix = enum_##suffix ::Case3; \ + enum_##suffix var_below_##suffix = static_cast(199); \ + enum_##suffix var_above_##suffix = static_cast(203); +#define DEFINE_SIGNED_ENUM(suffix, enum_type) \ + enum class enum_##suffix : enum_type{Case1 = -2, Case2, Case3}; \ + enum_##suffix var1_##suffix = enum_##suffix ::Case1; \ + enum_##suffix var2_##suffix = enum_##suffix ::Case2; \ + enum_##suffix var3_##suffix = enum_##suffix ::Case3; \ + enum_##suffix var_below_##suffix = static_cast(-3); \ + enum_##suffix var_above_##suffix = static_cast(1); -int main (int argc, char const *argv[]) -{ -#ifdef SIGNED_ENUM_CLASS_TYPE - typedef SIGNED_ENUM_CLASS_TYPE enum_integer_t; - enum class DayType : enum_integer_t { - Monday = -3, - Tuesday, - Wednesday, - Thursday, - Friday, - Saturday, - Sunday, - kNumDays - }; - enum_integer_t day_value; -#else - typedef UNSIGNED_ENUM_CLASS_TYPE enum_integer_t; - enum class DayType : enum_integer_t { - Monday = 200, - Tuesday, - Wednesday, - Thursday, - Friday, - Saturday, - Sunday, - kNumDays - }; - enum_integer_t day_value; -#endif +DEFINE_UNSIGNED_ENUM(uc, unsigned char) +DEFINE_SIGNED_ENUM(c, signed char) +DEFINE_UNSIGNED_ENUM(us, unsigned short int) +DEFINE_SIGNED_ENUM(s, signed short int) +DEFINE_UNSIGNED_ENUM(ui, unsigned int) +DEFINE_SIGNED_ENUM(i, signed int) +DEFINE_UNSIGNED_ENUM(ul, unsigned long) +DEFINE_SIGNED_ENUM(l, signed long) +DEFINE_UNSIGNED_ENUM(ull, unsigned long long) +DEFINE_SIGNED_ENUM(ll, signed long long) - for (day_value = (enum_integer_t)DayType::Monday - 1; day_value <= (enum_integer_t)DayType::kNumDays + 1; ++day_value) - { - DayType day = (DayType)day_value; - printf("day as int is %i\n", (int)day); // Set break point at this line. - } - return 0; // Break here for char tests -} +int main(int argc, char const *argv[]) { return 0; } From 691d436685fa2394b088a9e4726c075027ac9c51 Mon Sep 17 00:00:00 2001 From: Yang Zhihui Date: Fri, 4 Sep 2020 05:15:59 -0700 Subject: [PATCH 189/465] Fix typos in doc LangRef.rst Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D87077 --- llvm/docs/LangRef.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ce791b3aa9d48..2bd7d09f698db 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -6885,7 +6885,7 @@ where the first ``param`` is the number of the parameter it describes, which can be accessed by the function. This range does not include accesses by function calls from ``calls`` list. -where each ``Callee`` decribes how parameter is forwared into other +where each ``Callee`` describes how parameter is forwarded into other functions and looks like: .. code-block:: text @@ -15952,8 +15952,8 @@ Arguments: """""""""" The first argument ``%Ptr`` is a pointer type to the returned vector type, and -correponds to the start address to load from. The second argument ``%Stride`` -is a postive, constant integer with ``%Stride >= ``. ``%Stride`` is used +corresponds to the start address to load from. The second argument ``%Stride`` +is a positive, constant integer with ``%Stride >= ``. ``%Stride`` is used to compute the column memory addresses. I.e., for a column ``C``, its start memory addresses is calculated with ``%Ptr + C * %Stride``. The third Argument ```` is a boolean value. The fourth and fifth arguments, From 8784e9016d3d586dca90d6dd24fe663ce2e096ae Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Fri, 4 Sep 2020 19:06:14 +0700 Subject: [PATCH 190/465] [Test] Range fix in test test02_neg is not testing what it claims to test because its starting value -1 lies outside of specified range. --- llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll index 6a9713410200e..988b3923263f6 100644 --- a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll +++ b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll @@ -92,7 +92,7 @@ define i32 @test_02(i32* %p) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[RC:%.*]] = icmp sgt i32 [[IV_NEXT]], [[LEN]] ; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: @@ -132,7 +132,7 @@ define i32 @test_02_neg(i32* %p) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[RC:%.*]] = icmp sgt i32 [[IV_NEXT]], [[LEN]] ; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: @@ -166,4 +166,4 @@ exit: !0 = !{i32 0, i32 2147483647} -!1 = !{i32 -2147483648, i32 -1} +!1 = !{i32 -2147483648, i32 0} From 4d536c6da55d662b7aad8da71815d8cba7d44967 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 3 Sep 2020 16:26:59 -0400 Subject: [PATCH 191/465] [x86] add tests for store merging; NFC --- llvm/test/CodeGen/X86/stores-merging.ll | 66 +++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll index 85a086503410e..6d6796d1c902d 100644 --- a/llvm/test/CodeGen/X86/stores-merging.ll +++ b/llvm/test/CodeGen/X86/stores-merging.ll @@ -479,6 +479,39 @@ define void @trunc_i32_to_i16(i32 %x, i16* %p) { ret void } +define void @be_i32_to_i16(i32 %x, i16* %p0) { +; CHECK-LABEL: be_i32_to_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: movw %di, 2(%rsi) +; CHECK-NEXT: shrl $16, %edi +; CHECK-NEXT: movw %di, (%rsi) +; CHECK-NEXT: retq + %sh1 = lshr i32 %x, 16 + %t0 = trunc i32 %x to i16 + %t1 = trunc i32 %sh1 to i16 + %p1 = getelementptr inbounds i16, i16* %p0, i64 1 + store i16 %t0, i16* %p1, align 2 + store i16 %t1, i16* %p0, align 2 + ret void +} + +define void @be_i32_to_i16_order(i32 %x, i16* %p0) { +; CHECK-LABEL: be_i32_to_i16_order: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $16, %eax +; CHECK-NEXT: movw %ax, (%rsi) +; CHECK-NEXT: movw %di, 2(%rsi) +; CHECK-NEXT: retq + %sh1 = lshr i32 %x, 16 + %t0 = trunc i32 %x to i16 + %t1 = trunc i32 %sh1 to i16 + %p1 = getelementptr inbounds i16, i16* %p0, i64 1 + store i16 %t1, i16* %p0, align 2 + store i16 %t0, i16* %p1, align 2 + ret void +} + define void @trunc_i64_to_i8(i64 %x, i8* %p) { ; CHECK-LABEL: trunc_i64_to_i8: ; CHECK: # %bb.0: @@ -552,3 +585,36 @@ define void @trunc_i64_to_i32(i64 %x, i32* %p) { store i32 %t2, i32* %p1, align 4 ret void } + +define void @be_i64_to_i32(i64 %x, i32* %p0) { +; CHECK-LABEL: be_i64_to_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, 4(%rsi) +; CHECK-NEXT: shrq $32, %rdi +; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: retq + %sh1 = lshr i64 %x, 32 + %t0 = trunc i64 %x to i32 + %t1 = trunc i64 %sh1 to i32 + %p1 = getelementptr inbounds i32, i32* %p0, i64 1 + store i32 %t0, i32* %p1, align 4 + store i32 %t1, i32* %p0, align 4 + ret void +} + +define void @be_i64_to_i32_order(i64 %x, i32* %p0) { +; CHECK-LABEL: be_i64_to_i32_order: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movl %eax, (%rsi) +; CHECK-NEXT: movl %edi, 4(%rsi) +; CHECK-NEXT: retq + %sh1 = lshr i64 %x, 32 + %t0 = trunc i64 %x to i32 + %t1 = trunc i64 %sh1 to i32 + %p1 = getelementptr inbounds i32, i32* %p0, i64 1 + store i32 %t1, i32* %p0, align 4 + store i32 %t0, i32* %p1, align 4 + ret void +} From 6bc5e866bdba03fdd2d92bd72f3fce9c2973bf1f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Sep 2020 13:58:59 +0100 Subject: [PATCH 192/465] [MemCpyOpt] Account for case that MemInsertPoint == BI. In that case, the new MemoryDef needs to be inserted *before* MemInsertPoint. --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 8 ++++-- .../Transforms/MemCpyOpt/preserve-memssa.ll | 27 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index aff08a2c366c9..9011e36619d94 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -423,8 +423,12 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, if (MSSAU) { assert(LastMemDef && MemInsertPoint && "Both LastMemDef and MemInsertPoint need to be set"); - auto *NewDef = cast( - MSSAU->createMemoryAccessAfter(AMemSet, LastMemDef, MemInsertPoint)); + auto *NewDef = + cast(MemInsertPoint->getMemoryInst() == &*BI + ? MSSAU->createMemoryAccessBefore( + AMemSet, LastMemDef, MemInsertPoint) + : MSSAU->createMemoryAccessAfter( + AMemSet, LastMemDef, MemInsertPoint)); MSSAU->insertDef(NewDef, /*RenameUses=*/true); LastMemDef = NewDef; MemInsertPoint = NewDef; diff --git a/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll b/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll index 4e449ed5ff4f9..f3c54579d9d0e 100644 --- a/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll +++ b/llvm/test/Transforms/MemCpyOpt/preserve-memssa.ll @@ -123,6 +123,33 @@ entry: ret i8 %0 } +define void @test7([4 x i32]* %ptr) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[PTR:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[PTR]], i64 0, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[PTR]], i64 0, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[PTR]], i64 0, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP4]], i8 0, i64 16, i1 false) +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: ret void +; +entry: + %0 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 0, i32 0 + store i32 0, i32* %0, align 1 + %1 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 0, i32 1 + store i32 0, i32* %1, align 1 + %2 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 0, i32 2 + store i32 0, i32* %2, align 1 + %3 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 0, i32 3 + store i32 0, i32* %3, align 1 + call void @clobber() + ret void +} + +declare void @clobber() + ; Function Attrs: argmemonly nounwind willreturn declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0 From c633842f134171f31ace7e64088045f3a4975a88 Mon Sep 17 00:00:00 2001 From: Marius Brehler Date: Fri, 4 Sep 2020 10:39:30 +0200 Subject: [PATCH 193/465] [mlir] Fix includes in mlir-translate Drops the include on InitAllDialects.h, as dialects are now initialized in the translation passes. Differential Revision: https://reviews.llvm.org/D87129 --- mlir/tools/mlir-translate/mlir-translate.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/tools/mlir-translate/mlir-translate.cpp b/mlir/tools/mlir-translate/mlir-translate.cpp index cf84856ddb849..06ca10f51b9b7 100644 --- a/mlir/tools/mlir-translate/mlir-translate.cpp +++ b/mlir/tools/mlir-translate/mlir-translate.cpp @@ -11,13 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "mlir/InitAllDialects.h" #include "mlir/InitAllTranslations.h" +#include "mlir/Support/LogicalResult.h" #include "mlir/Translation.h" using namespace mlir; - namespace mlir { // Defined in the test directory, no public header. void registerTestRoundtripSPIRV(); From 4f7cdc10a83ce605a86f9d84bf6b11182c886ba3 Mon Sep 17 00:00:00 2001 From: Marius Brehler Date: Fri, 4 Sep 2020 10:52:25 +0200 Subject: [PATCH 194/465] [mlir] Refactor standalone-translate to use mlirTranslateMain() This refactors the standalone-translate executable to use mlirTranslateMain() declared in Translation.h and further applies D87129. Reviewed By: jpienaar Differential Revision: https://reviews.llvm.org/D87131 --- .../standalone-translate.cpp | 89 +------------------ 1 file changed, 2 insertions(+), 87 deletions(-) diff --git a/mlir/examples/standalone/standalone-translate/standalone-translate.cpp b/mlir/examples/standalone/standalone-translate/standalone-translate.cpp index 7ccad3b4ee2be..f2f0ac56360da 100644 --- a/mlir/examples/standalone/standalone-translate/standalone-translate.cpp +++ b/mlir/examples/standalone/standalone-translate/standalone-translate.cpp @@ -11,102 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "mlir/IR/AsmState.h" -#include "mlir/IR/Diagnostics.h" -#include "mlir/IR/MLIRContext.h" -#include "mlir/InitAllDialects.h" #include "mlir/InitAllTranslations.h" -#include "mlir/Support/FileUtilities.h" #include "mlir/Support/LogicalResult.h" -#include "mlir/Support/ToolUtilities.h" #include "mlir/Translation.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/ToolOutputFile.h" #include "Standalone/StandaloneDialect.h" -static llvm::cl::opt inputFilename(llvm::cl::Positional, - llvm::cl::desc(""), - llvm::cl::init("-")); - -static llvm::cl::opt - outputFilename("o", llvm::cl::desc("Output filename"), - llvm::cl::value_desc("filename"), llvm::cl::init("-")); - -static llvm::cl::opt - splitInputFile("split-input-file", - llvm::cl::desc("Split the input file into pieces and " - "process each chunk independently"), - llvm::cl::init(false)); - -static llvm::cl::opt verifyDiagnostics( - "verify-diagnostics", - llvm::cl::desc("Check that emitted diagnostics match " - "expected-* lines on the corresponding line"), - llvm::cl::init(false)); - int main(int argc, char **argv) { mlir::registerAllTranslations(); // TODO: Register standalone translations here. - llvm::InitLLVM y(argc, argv); - - // Add flags for all the registered translations. - llvm::cl::opt - translationRequested("", llvm::cl::desc("Translation to perform"), - llvm::cl::Required); - mlir::registerAsmPrinterCLOptions(); - mlir::registerMLIRContextCLOptions(); - llvm::cl::ParseCommandLineOptions(argc, argv, "MLIR translation driver\n"); - - std::string errorMessage; - auto input = mlir::openInputFile(inputFilename, &errorMessage); - if (!input) { - llvm::errs() << errorMessage << "\n"; - return 1; - } - - auto output = mlir::openOutputFile(outputFilename, &errorMessage); - if (!output) { - llvm::errs() << errorMessage << "\n"; - return 1; - } - - // Processes the memory buffer with a new MLIRContext. - auto processBuffer = [&](std::unique_ptr ownedBuffer, - llvm::raw_ostream &os) { - mlir::MLIRContext context; - context.allowUnregisteredDialects(); - context.printOpOnDiagnostic(!verifyDiagnostics); - llvm::SourceMgr sourceMgr; - sourceMgr.AddNewSourceBuffer(std::move(ownedBuffer), llvm::SMLoc()); - - if (!verifyDiagnostics) { - mlir::SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context); - return (*translationRequested)(sourceMgr, os, &context); - } - - // In the diagnostic verification flow, we ignore whether the translation - // failed (in most cases, it is expected to fail). Instead, we check if the - // diagnostics were produced as expected. - mlir::SourceMgrDiagnosticVerifierHandler sourceMgrHandler(sourceMgr, - &context); - (*translationRequested)(sourceMgr, os, &context); - return sourceMgrHandler.verify(); - }; - - if (splitInputFile) { - if (failed(mlir::splitAndProcessBuffer(std::move(input), processBuffer, - output->os()))) - return 1; - } else { - if (failed(processBuffer(std::move(input), output->os()))) - return 1; - } - - output->keep(); - return 0; + return failed( + mlir::mlirTranslateMain(argc, argv, "MLIR Translation Testing Tool")); } From 6cb54cfe0b40216f0cac03f1f70a5b0ba328d5bb Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Sep 2020 09:48:04 +0100 Subject: [PATCH 195/465] [DSE] Move legacy tests to DeadStoreElimination/MemDepAnalysis. This patch moves the tests for the old MemDepAnalysis based DSE implementation to the MemDepAnalysis subdirectory and updates them to pass -enable-dse-memoryssa=false. This is in preparation for the switch to MemorySSA-backed DSE. --- .../{ => MemDepAnalysis}/2011-03-25-DSEMiscompile.ll | 2 +- .../{ => MemDepAnalysis}/2011-09-06-EndOfFunction.ll | 2 +- .../{ => MemDepAnalysis}/2011-09-06-MemCpy.ll | 2 +- .../{ => MemDepAnalysis}/2016-07-17-UseAfterFree.ll | 2 +- .../{ => MemDepAnalysis}/DeleteThrowableInst.ll | 2 +- .../{ => MemDepAnalysis}/OverwriteStoreBegin.ll | 2 +- .../{ => MemDepAnalysis}/OverwriteStoreEnd.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/PartialStore.ll | 2 +- .../{ => MemDepAnalysis}/PartialStore2.ll | 4 ++-- .../{ => MemDepAnalysis}/X86/gather-null-pointer.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/atomic.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/calloc-store.ll | 2 +- .../{ => MemDepAnalysis}/combined-partial-overwrites.ll | 2 +- .../{ => MemDepAnalysis}/const-pointers.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/crash.ll | 2 +- .../{ => MemDepAnalysis}/cs-cs-aliasing.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/debuginfo.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/dominate.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/fence.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/free.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/inst-limits.ll | 2 +- .../{ => MemDepAnalysis}/int_sideeffect.ll | 2 +- .../{ => MemDepAnalysis}/invariant.start.ll | 2 +- .../{ => MemDepAnalysis}/launder.invariant.group.ll | 4 ++-- .../DeadStoreElimination/{ => MemDepAnalysis}/libcalls.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/libcalls2.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/lifetime.ll | 2 +- .../{ => MemDepAnalysis}/mda-with-dbg-values.ll | 4 ++-- .../{ => MemDepAnalysis}/memintrinsics.ll | 2 +- .../{ => MemDepAnalysis}/memset-missing-debugloc.ll | 2 +- .../{ => MemDepAnalysis}/merge-stores-big-endian.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/merge-stores.ll | 2 +- .../{ => MemDepAnalysis}/no-targetdata.ll | 2 +- .../{ => MemDepAnalysis}/operand-bundles.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/pr11390.ll | 2 +- .../DeadStoreElimination/{ => MemDepAnalysis}/simple.ll | 4 ++-- .../DeadStoreElimination/{ => MemDepAnalysis}/tail-byval.ll | 2 +- 37 files changed, 41 insertions(+), 41 deletions(-) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/2011-03-25-DSEMiscompile.ll (92%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/2011-09-06-EndOfFunction.ll (93%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/2011-09-06-MemCpy.ll (98%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/2016-07-17-UseAfterFree.ll (89%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/DeleteThrowableInst.ll (93%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/OverwriteStoreBegin.ll (99%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/OverwriteStoreEnd.ll (99%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/PartialStore.ll (95%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/PartialStore2.ll (83%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/X86/gather-null-pointer.ll (93%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/atomic.ll (97%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/calloc-store.ll (95%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/combined-partial-overwrites.ll (98%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/const-pointers.ll (91%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/crash.ll (98%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/cs-cs-aliasing.ll (97%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/debuginfo.ll (92%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/dominate.ll (87%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/fence.ll (97%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/free.ll (94%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/inst-limits.ll (99%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/int_sideeffect.ll (80%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/invariant.start.ll (93%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/launder.invariant.group.ll (93%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/libcalls.ll (96%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/libcalls2.ll (82%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/lifetime.ll (92%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/mda-with-dbg-values.ll (94%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/memintrinsics.ll (97%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/memset-missing-debugloc.ll (98%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/merge-stores-big-endian.ll (98%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/merge-stores.ll (98%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/no-targetdata.ll (89%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/operand-bundles.ll (94%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/pr11390.ll (95%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/simple.ll (99%) rename llvm/test/Transforms/DeadStoreElimination/{ => MemDepAnalysis}/tail-byval.ll (91%) diff --git a/llvm/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-03-25-DSEMiscompile.ll similarity index 92% rename from llvm/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-03-25-DSEMiscompile.ll index 25c2d5ffe7f56..826732c07af91 100644 --- a/llvm/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-03-25-DSEMiscompile.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s ; PR9561 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin9.8" diff --git a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-EndOfFunction.ll similarity index 93% rename from llvm/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-EndOfFunction.ll index 7e46d28a9c47f..3f77349879170 100644 --- a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-EndOfFunction.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin" diff --git a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-MemCpy.ll similarity index 98% rename from llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-MemCpy.ll index 665d772d03b91..6837a3dd852f8 100644 --- a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2011-09-06-MemCpy.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2016-07-17-UseAfterFree.ll similarity index 89% rename from llvm/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2016-07-17-UseAfterFree.ll index 3501b43600168..7b74dde7d6b00 100644 --- a/llvm/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/2016-07-17-UseAfterFree.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S -enable-dse-partial-overwrite-tracking | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S -enable-dse-partial-overwrite-tracking | FileCheck %s ; PR28588 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/DeleteThrowableInst.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/DeleteThrowableInst.ll similarity index 93% rename from llvm/test/Transforms/DeadStoreElimination/DeleteThrowableInst.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/DeleteThrowableInst.ll index 5bb29529c2665..7e8c9ca3bd8a7 100644 --- a/llvm/test/Transforms/DeadStoreElimination/DeleteThrowableInst.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/DeleteThrowableInst.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s declare i8* @_Znwj(i32) local_unnamed_addr declare void @foo() readnone diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreBegin.ll similarity index 99% rename from llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreBegin.ll index b5d9c40cbdbc3..56334be182918 100644 --- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreBegin.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s define void @write4to7(i32* nocapture %p) { ; CHECK-LABEL: @write4to7( diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreEnd.ll similarity index 99% rename from llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreEnd.ll index b6ae657d17e5e..a8f09e3e3b44a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/OverwriteStoreEnd.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" %struct.vec2 = type { <4 x i32>, <4 x i32> } diff --git a/llvm/test/Transforms/DeadStoreElimination/PartialStore.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore.ll similarity index 95% rename from llvm/test/Transforms/DeadStoreElimination/PartialStore.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore.ll index 1dd894e6658cc..bb6d024701bac 100644 --- a/llvm/test/Transforms/DeadStoreElimination/PartialStore.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -enable-dse-partial-store-merging=false -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=false -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Ensure that the dead store is deleted in this case. It is wholely diff --git a/llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore2.ll similarity index 83% rename from llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore2.ll index ebcb0c3808a15..4fb271b134585 100644 --- a/llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/PartialStore2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s --data-layout "e" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s -; RUN: opt < %s --data-layout "E" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s +; RUN: opt < %s --data-layout "e" -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s +; RUN: opt < %s --data-layout "E" -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s ; This test used to hit an assertion (see PR41949). ; diff --git a/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/X86/gather-null-pointer.ll similarity index 93% rename from llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/X86/gather-null-pointer.ll index 6a5f4bb9eb25c..ee56a6df6aab8 100644 --- a/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/X86/gather-null-pointer.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -dse -S | FileCheck %s +; RUN: opt < %s -dse -enable-dse-memoryssa=false -S | FileCheck %s ; Both stores should be emitted because we can't tell if the gather aliases. diff --git a/llvm/test/Transforms/DeadStoreElimination/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/atomic.ll similarity index 97% rename from llvm/test/Transforms/DeadStoreElimination/atomic.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/atomic.ll index 29850b7ac27f8..ec1917f82b817 100644 --- a/llvm/test/Transforms/DeadStoreElimination/atomic.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/atomic.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.7.0" diff --git a/llvm/test/Transforms/DeadStoreElimination/calloc-store.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/calloc-store.ll similarity index 95% rename from llvm/test/Transforms/DeadStoreElimination/calloc-store.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/calloc-store.ll index 5bd384b033fbc..cfc73f45853d0 100644 --- a/llvm/test/Transforms/DeadStoreElimination/calloc-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/calloc-store.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s declare noalias i8* @calloc(i64, i64) diff --git a/llvm/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/combined-partial-overwrites.ll similarity index 98% rename from llvm/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/combined-partial-overwrites.ll index a57693a1da38e..0e98e966ce1db 100644 --- a/llvm/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/combined-partial-overwrites.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging=false < %s | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux" diff --git a/llvm/test/Transforms/DeadStoreElimination/const-pointers.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/const-pointers.ll similarity index 91% rename from llvm/test/Transforms/DeadStoreElimination/const-pointers.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/const-pointers.ll index a2218b725cd3b..16be53c7a23b3 100644 --- a/llvm/test/Transforms/DeadStoreElimination/const-pointers.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/const-pointers.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %t = type { i32 } diff --git a/llvm/test/Transforms/DeadStoreElimination/crash.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/crash.ll similarity index 98% rename from llvm/test/Transforms/DeadStoreElimination/crash.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/crash.ll index ccee7fb8ba58b..d211ff97dac7a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/crash.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin10.0" diff --git a/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/cs-cs-aliasing.ll similarity index 97% rename from llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/cs-cs-aliasing.ll index b403e3382234d..9df0bad79929a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/cs-cs-aliasing.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/debuginfo.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/debuginfo.ll similarity index 92% rename from llvm/test/Transforms/DeadStoreElimination/debuginfo.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/debuginfo.ll index b927965dc4054..fbca29decdece 100644 --- a/llvm/test/Transforms/DeadStoreElimination/debuginfo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/debuginfo.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -debugify -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -debugify -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/dominate.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/dominate.ll similarity index 87% rename from llvm/test/Transforms/DeadStoreElimination/dominate.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/dominate.ll index 24dd65e07bbc2..63f1a3ffc8ff5 100644 --- a/llvm/test/Transforms/DeadStoreElimination/dominate.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/dominate.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -disable-output < %s +; RUN: opt -dse -enable-dse-memoryssa=false -disable-output < %s ; test that we don't crash declare void @bar() diff --git a/llvm/test/Transforms/DeadStoreElimination/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/fence.ll similarity index 97% rename from llvm/test/Transforms/DeadStoreElimination/fence.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/fence.ll index 0ef29b0bd7222..ce2dcd3236b74 100644 --- a/llvm/test/Transforms/DeadStoreElimination/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/fence.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s ; We conservative choose to prevent dead store elimination ; across release or stronger fences. It's not required diff --git a/llvm/test/Transforms/DeadStoreElimination/free.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/free.ll similarity index 94% rename from llvm/test/Transforms/DeadStoreElimination/free.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/free.ll index 66ccc7b4f47b5..275b7e372f111 100644 --- a/llvm/test/Transforms/DeadStoreElimination/free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/free.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s target datalayout = "e-p:64:64:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/inst-limits.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/inst-limits.ll similarity index 99% rename from llvm/test/Transforms/DeadStoreElimination/inst-limits.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/inst-limits.ll index e9e46df402e9d..6e5f4c150459d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/inst-limits.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/inst-limits.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dse < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa=false < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; If there are two stores to the same location, DSE should be able to remove diff --git a/llvm/test/Transforms/DeadStoreElimination/int_sideeffect.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/int_sideeffect.ll similarity index 80% rename from llvm/test/Transforms/DeadStoreElimination/int_sideeffect.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/int_sideeffect.ll index 035e787f6bd7a..806c7362267d1 100644 --- a/llvm/test/Transforms/DeadStoreElimination/int_sideeffect.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/int_sideeffect.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -dse | FileCheck %s +; RUN: opt -S < %s -dse -enable-dse-memoryssa=false | FileCheck %s declare void @llvm.sideeffect() diff --git a/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/invariant.start.ll similarity index 93% rename from llvm/test/Transforms/DeadStoreElimination/invariant.start.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/invariant.start.ll index 27400cd4ed16c..5a0c1f05a2d37 100644 --- a/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/invariant.start.ll @@ -1,5 +1,5 @@ ; Test to make sure llvm.invariant.start calls are not treated as clobbers. -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly diff --git a/llvm/test/Transforms/DeadStoreElimination/launder.invariant.group.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/launder.invariant.group.ll similarity index 93% rename from llvm/test/Transforms/DeadStoreElimination/launder.invariant.group.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/launder.invariant.group.ll index dbbc9451edd09..815ad02d4fc0f 100644 --- a/llvm/test/Transforms/DeadStoreElimination/launder.invariant.group.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/launder.invariant.group.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s ; CHECK-LABEL: void @skipBarrier(i8* %ptr) define void @skipBarrier(i8* %ptr) { @@ -62,4 +62,4 @@ define void @skip4Barriers(i8* %ptr) { declare i8* @llvm.launder.invariant.group.p0i8(i8*) -declare i8* @llvm.strip.invariant.group.p0i8(i8*) \ No newline at end of file +declare i8* @llvm.strip.invariant.group.p0i8(i8*) diff --git a/llvm/test/Transforms/DeadStoreElimination/libcalls.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls.ll similarity index 96% rename from llvm/test/Transforms/DeadStoreElimination/libcalls.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls.ll index a18021d0c5c65..692a9a03ea824 100644 --- a/llvm/test/Transforms/DeadStoreElimination/libcalls.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/libcalls2.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls2.ll similarity index 82% rename from llvm/test/Transforms/DeadStoreElimination/libcalls2.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls2.ll index df3011314e9eb..23440f18b052a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/libcalls2.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/libcalls2.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/lifetime.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/lifetime.ll similarity index 92% rename from llvm/test/Transforms/DeadStoreElimination/lifetime.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/lifetime.ll index e9999ff0740f3..3f20d9e1dda2d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/lifetime.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/lifetime.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -basic-aa -dse < %s | FileCheck %s +; RUN: opt -S -basic-aa -dse -enable-dse-memoryssa=false < %s | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/mda-with-dbg-values.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/mda-with-dbg-values.ll similarity index 94% rename from llvm/test/Transforms/DeadStoreElimination/mda-with-dbg-values.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/mda-with-dbg-values.ll index 4c6776b3758ef..b46e1392723cf 100644 --- a/llvm/test/Transforms/DeadStoreElimination/mda-with-dbg-values.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/mda-with-dbg-values.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -dse -memdep-block-scan-limit=3 < %s | FileCheck %s -; RUN: opt -S -strip-debug -dse -memdep-block-scan-limit=3 < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa=false -memdep-block-scan-limit=3 < %s | FileCheck %s +; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa=false -memdep-block-scan-limit=3 < %s | FileCheck %s ; Test case to check that the memory dependency analysis gets the same ; result even if we have a dbg value between the memcpy and diff --git a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memintrinsics.ll similarity index 97% rename from llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memintrinsics.ll index 2902712376930..4a54d848b65c6 100644 --- a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memintrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -dse < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa=false < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memset-missing-debugloc.ll similarity index 98% rename from llvm/test/Transforms/DeadStoreElimination/memset-missing-debugloc.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memset-missing-debugloc.ll index c0f490c0d6c25..ea07f91492393 100644 --- a/llvm/test/Transforms/DeadStoreElimination/memset-missing-debugloc.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/memset-missing-debugloc.ll @@ -1,7 +1,7 @@ ; Test that the getelementptr generated when the dse pass determines that ; a memset can be shortened has the debugloc carried over from the memset. -; RUN: opt -S -march=native -dse < %s| FileCheck %s +; RUN: opt -S -march=native -dse -enable-dse-memoryssa=false < %s| FileCheck %s ; CHECK: bitcast [5 x i64]* %{{[a-zA-Z_][a-zA-Z0-9_]*}} to i8*, !dbg ; CHECK-NEXT: %{{[0-9]+}} = getelementptr inbounds i8, i8* %0, i64 32, !dbg ![[DBG:[0-9]+]] ; CHECK: ![[DBG]] = !DILocation(line: 2, diff --git a/llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores-big-endian.ll similarity index 98% rename from llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores-big-endian.ll index 77784ac0c4047..1d4f7b38b4c02 100644 --- a/llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores-big-endian.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging -S < %s | FileCheck %s target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128" define void @byte_by_byte_replacement(i32 *%ptr) { diff --git a/llvm/test/Transforms/DeadStoreElimination/merge-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores.ll similarity index 98% rename from llvm/test/Transforms/DeadStoreElimination/merge-stores.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores.ll index 095ad9d8530c8..b3e16f8e5a8ec 100644 --- a/llvm/test/Transforms/DeadStoreElimination/merge-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/merge-stores.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa=false -enable-dse-partial-store-merging -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" define void @byte_by_byte_replacement(i32 *%ptr) { diff --git a/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/no-targetdata.ll similarity index 89% rename from llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/no-targetdata.ll index aec3076678787..ae32017c6a459 100644 --- a/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/no-targetdata.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/operand-bundles.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/operand-bundles.ll similarity index 94% rename from llvm/test/Transforms/DeadStoreElimination/operand-bundles.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/operand-bundles.ll index f3df74be031b7..18117b3def8df 100644 --- a/llvm/test/Transforms/DeadStoreElimination/operand-bundles.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/operand-bundles.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s declare noalias i8* @malloc(i64) "malloc-like" diff --git a/llvm/test/Transforms/DeadStoreElimination/pr11390.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/pr11390.ll similarity index 95% rename from llvm/test/Transforms/DeadStoreElimination/pr11390.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/pr11390.ll index 56ca604eff98b..ac806d6f170a0 100644 --- a/llvm/test/Transforms/DeadStoreElimination/pr11390.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/pr11390.ll @@ -1,4 +1,4 @@ -; RUN: opt -basic-aa -dse -S < %s | FileCheck %s +; RUN: opt -basic-aa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s ; PR11390 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/simple.ll similarity index 99% rename from llvm/test/Transforms/DeadStoreElimination/simple.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/simple.ll index 871190c584120..78f8a7c245254 100644 --- a/llvm/test/Transforms/DeadStoreElimination/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/simple.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -dse -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s +; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa=false -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa=false -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/tail-byval.ll b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/tail-byval.ll similarity index 91% rename from llvm/test/Transforms/DeadStoreElimination/tail-byval.ll rename to llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/tail-byval.ll index ed2fbd434a75d..0b4a76fee55cd 100644 --- a/llvm/test/Transforms/DeadStoreElimination/tail-byval.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MemDepAnalysis/tail-byval.ll @@ -1,4 +1,4 @@ -; RUN: opt -dse -S < %s | FileCheck %s +; RUN: opt -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s ; Don't eliminate stores to allocas before tail calls to functions that use ; byval. It's correct to mark calls like these as 'tail'. To implement this tail From 7b0332389afd705f46b02fcf87ec3414b8dece34 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 4 Sep 2020 10:13:28 -0400 Subject: [PATCH 196/465] Revert "Canonicalize declaration pointers when forming APValues." This reverts commit e6393ee813178e9d3306b8e3c6949a4f32f8a2cb. It breaks Wunreachable for weak attributes, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20200831/336645.html --- clang/include/clang/AST/APValue.h | 4 +-- clang/lib/AST/APValue.cpp | 26 ++++++------------- clang/lib/AST/ExprConstant.cpp | 18 ++++++++----- .../CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp | 3 ++- clang/test/OpenMP/ordered_messages.cpp | 5 +--- 5 files changed, 25 insertions(+), 31 deletions(-) diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h index 485e6c2602cff..87e4bd7f84c11 100644 --- a/clang/include/clang/AST/APValue.h +++ b/clang/include/clang/AST/APValue.h @@ -174,7 +174,6 @@ class APValue { return !(LHS == RHS); } friend llvm::hash_code hash_value(const LValueBase &Base); - friend struct llvm::DenseMapInfo; private: PtrTy Ptr; @@ -202,7 +201,8 @@ class APValue { public: LValuePathEntry() : Value() {} - LValuePathEntry(BaseOrMemberType BaseOrMember); + LValuePathEntry(BaseOrMemberType BaseOrMember) + : Value{reinterpret_cast(BaseOrMember.getOpaqueValue())} {} static LValuePathEntry ArrayIndex(uint64_t Index) { LValuePathEntry Result; Result.Value = Index; diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 7531229654cf4..2a8834b4db0cb 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -38,7 +38,7 @@ static_assert( "Type is insufficiently aligned"); APValue::LValueBase::LValueBase(const ValueDecl *P, unsigned I, unsigned V) - : Ptr(P ? cast(P->getCanonicalDecl()) : nullptr), Local{I, V} {} + : Ptr(P), Local{I, V} {} APValue::LValueBase::LValueBase(const Expr *P, unsigned I, unsigned V) : Ptr(P), Local{I, V} {} @@ -82,19 +82,13 @@ bool operator==(const APValue::LValueBase &LHS, const APValue::LValueBase &RHS) { if (LHS.Ptr != RHS.Ptr) return false; - if (LHS.is() || LHS.is()) + if (LHS.is()) return true; return LHS.Local.CallIndex == RHS.Local.CallIndex && LHS.Local.Version == RHS.Local.Version; } } -APValue::LValuePathEntry::LValuePathEntry(BaseOrMemberType BaseOrMember) { - if (const Decl *D = BaseOrMember.getPointer()) - BaseOrMember.setPointer(D->getCanonicalDecl()); - Value = reinterpret_cast(BaseOrMember.getOpaqueValue()); -} - namespace { struct LVBase { APValue::LValueBase Base; @@ -119,16 +113,14 @@ APValue::LValueBase::operator bool () const { clang::APValue::LValueBase llvm::DenseMapInfo::getEmptyKey() { - clang::APValue::LValueBase B; - B.Ptr = DenseMapInfo::getEmptyKey(); - return B; + return clang::APValue::LValueBase( + DenseMapInfo::getEmptyKey()); } clang::APValue::LValueBase llvm::DenseMapInfo::getTombstoneKey() { - clang::APValue::LValueBase B; - B.Ptr = DenseMapInfo::getTombstoneKey(); - return B; + return clang::APValue::LValueBase( + DenseMapInfo::getTombstoneKey()); } namespace clang { @@ -765,10 +757,8 @@ void APValue::MakeMemberPointer(const ValueDecl *Member, bool IsDerivedMember, assert(isAbsent() && "Bad state change"); MemberPointerData *MPD = new ((void*)(char*)Data.buffer) MemberPointerData; Kind = MemberPointer; - MPD->MemberAndIsDerivedMember.setPointer( - Member ? cast(Member->getCanonicalDecl()) : nullptr); + MPD->MemberAndIsDerivedMember.setPointer(Member); MPD->MemberAndIsDerivedMember.setInt(IsDerivedMember); MPD->resizePath(Path.size()); - for (unsigned I = 0; I != Path.size(); ++I) - MPD->getPath()[I] = Path[I]->getCanonicalDecl(); + memcpy(MPD->getPath(), Path.data(), Path.size()*sizeof(const CXXRecordDecl*)); } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 8e43b62662eef..e8f132dd48032 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1978,11 +1978,18 @@ static bool HasSameBase(const LValue &A, const LValue &B) { return false; if (A.getLValueBase().getOpaqueValue() != - B.getLValueBase().getOpaqueValue()) - return false; + B.getLValueBase().getOpaqueValue()) { + const Decl *ADecl = GetLValueBaseDecl(A); + if (!ADecl) + return false; + const Decl *BDecl = GetLValueBaseDecl(B); + if (!BDecl || ADecl->getCanonicalDecl() != BDecl->getCanonicalDecl()) + return false; + } - return A.getLValueCallIndex() == B.getLValueCallIndex() && - A.getLValueVersion() == B.getLValueVersion(); + return IsGlobalLValue(A.getLValueBase()) || + (A.getLValueCallIndex() == B.getLValueCallIndex() && + A.getLValueVersion() == B.getLValueVersion()); } static void NoteLValueLocation(EvalInfo &Info, APValue::LValueBase Base) { @@ -3101,8 +3108,7 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E, // If we're currently evaluating the initializer of this declaration, use that // in-flight value. - if (declaresSameEntity(Info.EvaluatingDecl.dyn_cast(), - VD)) { + if (Info.EvaluatingDecl.dyn_cast() == VD) { Result = Info.EvaluatingDeclValue; return true; } diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp index 3720b277af7a9..8d51dbde71776 100644 --- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp +++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp @@ -24,10 +24,11 @@ constexpr double &ni3; // expected-error {{declaration of reference variable 'ni constexpr int nc1 = i; // expected-error {{constexpr variable 'nc1' must be initialized by a constant expression}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}} constexpr C nc2 = C(); // expected-error {{cannot have non-literal type 'const C'}} -int &f(); // expected-note 2{{declared here}} +int &f(); // expected-note {{declared here}} constexpr int &nc3 = f(); // expected-error {{constexpr variable 'nc3' must be initialized by a constant expression}} expected-note {{non-constexpr function 'f' cannot be used in a constant expression}} constexpr int nc4(i); // expected-error {{constexpr variable 'nc4' must be initialized by a constant expression}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}} constexpr C nc5((C())); // expected-error {{cannot have non-literal type 'const C'}} +int &f(); // expected-note {{here}} constexpr int &nc6(f()); // expected-error {{constexpr variable 'nc6' must be initialized by a constant expression}} expected-note {{non-constexpr function 'f'}} struct pixel { diff --git a/clang/test/OpenMP/ordered_messages.cpp b/clang/test/OpenMP/ordered_messages.cpp index 8a3a86443eb8c..f6b9dbd6d27fa 100644 --- a/clang/test/OpenMP/ordered_messages.cpp +++ b/clang/test/OpenMP/ordered_messages.cpp @@ -16,9 +16,6 @@ void xxx(int argc) { } int foo(); -#if __cplusplus >= 201103L -// expected-note@-2 {{declared here}} -#endif template T foo() { @@ -179,7 +176,7 @@ T foo() { int foo() { #if __cplusplus >= 201103L -// expected-note@-2 {{declared here}} +// expected-note@-2 2 {{declared here}} #endif int k; #pragma omp for ordered From c88a77620436ee475d54d3b5ced30286101e0dc9 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 4 Sep 2020 10:23:16 -0400 Subject: [PATCH 197/465] [gn build] fix llvm-lit after d23b15cc16077 --- llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn index 648646e4188d6..16669c4219c93 100644 --- a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn @@ -86,7 +86,7 @@ write_cmake_config("llvm-lit") { values = [ "LLVM_SOURCE_DIR=" + rebase_path("//llvm", dir), - "Python3_EXECUTABLE=$python_path", + "Python3_EXECUTABLE=/usr/bin/env $python_path", "BUILD_MODE=.", "LLVM_LIT_CONFIG_MAP=" + config_map, ] From 7d53fecb679228025ea0b1a69209fdcb85b2ae47 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 3 Sep 2020 20:55:20 -0400 Subject: [PATCH 198/465] [spirv] Add more target and resource limit fields These fields will be used to choose/influence patterns for SPIR-V code generation. Reviewed By: mravishankar Differential Revision: https://reviews.llvm.org/D87106 --- mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td | 18 +++++++++++++++ .../include/mlir/Dialect/SPIRV/TargetAndABI.h | 2 ++ .../mlir/Dialect/SPIRV/TargetAndABI.td | 23 ++++++++++++++++++- mlir/lib/Dialect/SPIRV/TargetAndABI.cpp | 23 ++++++++++++++----- 4 files changed, 59 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td index 6458183bdeb2d..21f926a1500c5 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td @@ -247,6 +247,24 @@ def QueryCapabilityInterface : OpInterface<"QueryCapabilityInterface"> { "getCapabilities">]; } +//===----------------------------------------------------------------------===// +// SPIR-V target GPU vendor and device definitions +//===----------------------------------------------------------------------===// + +// An accelerator other than GPU or CPU +def SPV_DT_Other : I32EnumAttrCase<"Other", 0>; +def SPV_DT_IntegratedGPU : I32EnumAttrCase<"IntegratedGPU", 1>; +def SPV_DT_DiscreteGPU : I32EnumAttrCase<"DiscreteGPU", 2>; +def SPV_DT_CPU : I32EnumAttrCase<"CPU", 3>; +// Information missing. +def SPV_DT_Unknown : I32EnumAttrCase<"Unknown", 0x7FFFFFFF>; + +def SPV_DeviceTypeAttr : SPV_I32EnumAttr< + "DeviceType", "valid SPIR-V device types", [ + SPV_DT_Other, SPV_DT_IntegratedGPU, SPV_DT_DiscreteGPU, + SPV_DT_CPU, SPV_DT_Unknown + ]>; + //===----------------------------------------------------------------------===// // SPIR-V extension definitions //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h index e276123c4bb54..5ef999d1b9fe1 100644 --- a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h +++ b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h @@ -29,6 +29,8 @@ class TargetEnv { public: explicit TargetEnv(TargetEnvAttr targetAttr); + DeviceType getDeviceType(); + Version getVersion(); /// Returns true if the given capability is allowed. diff --git a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td index 04fcc8e0b53ed..af4da692c5de5 100644 --- a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td +++ b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.td @@ -45,10 +45,31 @@ def SPV_CapabilityArrayAttr : TypedArrayAttrBase< // are the from Vulkan limit requirements: // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#limits-minmax def SPV_ResourceLimitsAttr : StructAttr<"ResourceLimitsAttr", SPIRV_Dialect, [ + // Unique identifier for the vendor and target GPU. + // 0x7FFFFFFF means unknown. + StructFieldAttr<"vendor_id", DefaultValuedAttr>, + StructFieldAttr<"device_id", DefaultValuedAttr>, + // Target device type. + StructFieldAttr<"device_type", + DefaultValuedAttr>, + + // The maximum total storage size, in bytes, available for variables + // declared with the Workgroup storage class. + StructFieldAttr<"max_compute_shared_memory_size", + DefaultValuedAttr>, + + // The maximum total number of compute shader invocations in a single local + // workgroup. StructFieldAttr<"max_compute_workgroup_invocations", DefaultValuedAttr>, + // The maximum size of a local compute workgroup, per dimension. StructFieldAttr<"max_compute_workgroup_size", - DefaultValuedAttr> + DefaultValuedAttr>, + + // The default number of invocations in each subgroup. + // 0x7FFFFFFF means unknown. + StructFieldAttr<"subgroup_size", DefaultValuedAttr> ]>; #endif // SPIRV_TARGET_AND_ABI diff --git a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp index b5a82487188c0..8befc6db2935b 100644 --- a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp +++ b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp @@ -38,6 +38,14 @@ spirv::TargetEnv::TargetEnv(spirv::TargetEnvAttr targetAttr) } } +spirv::DeviceType spirv::TargetEnv::getDeviceType() { + auto deviceType = spirv::symbolizeDeviceType( + targetAttr.getResourceLimits().device_type().getInt()); + if (!deviceType) + return DeviceType::Unknown; + return *deviceType; +} + spirv::Version spirv::TargetEnv::getVersion() { return targetAttr.getVersion(); } @@ -134,13 +142,16 @@ DenseIntElementsAttr spirv::lookupLocalWorkGroupSize(Operation *op) { spirv::ResourceLimitsAttr spirv::getDefaultResourceLimits(MLIRContext *context) { - auto i32Type = IntegerType::get(32, context); - auto v3i32Type = VectorType::get(3, i32Type); - - // These numbers are from "Table 46. Required Limits" of the Vulkan spec. + // All the fields have default values. Here we just provide a nicer way to + // construct a default resource limit attribute. return spirv::ResourceLimitsAttr ::get( - IntegerAttr::get(i32Type, 128), - DenseIntElementsAttr::get(v3i32Type, {128, 128, 64}), context); + /*vendor_id=*/nullptr, + /*device_id*/ nullptr, + /*device_type=*/nullptr, + /*max_compute_shared_memory_size=*/nullptr, + /*max_compute_workgroup_invocations=*/nullptr, + /*max_compute_workgroup_size=*/nullptr, + /*subgroup_size=*/nullptr, context); } StringRef spirv::getTargetEnvAttrName() { return "spv.target_env"; } From 2a03f270d69cf1079feb029f84727288e217588a Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 4 Sep 2020 10:26:46 -0400 Subject: [PATCH 199/465] clang: Add test for -Wunreachable-code + weak redeclaration This tests what caused the revert in 7b033238. --- clang/test/SemaCXX/unreachable-code.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/clang/test/SemaCXX/unreachable-code.cpp b/clang/test/SemaCXX/unreachable-code.cpp index fd006c099e7dc..0dfc3d5744fb3 100644 --- a/clang/test/SemaCXX/unreachable-code.cpp +++ b/clang/test/SemaCXX/unreachable-code.cpp @@ -68,3 +68,12 @@ int pr6130(unsigned i) { throw PR6130(); // no-warning } } + +extern "C" void foo(void); +extern "C" __attribute__((weak)) decltype(foo) foo; + +void weak_redecl() { + if (foo) + return; + bar(); // no-warning +} From 3404add4684d5959d1ed997149d9896d136d4d3c Mon Sep 17 00:00:00 2001 From: Bryan Chan Date: Sun, 30 Aug 2020 19:17:43 -0400 Subject: [PATCH 200/465] [EarlyCSE] Verify hash code in regression tests As discussed in D86843, -earlycse-debug-hash should be used in more regression tests to catch inconsistency between the hashing and the equivalence check. Differential Revision: https://reviews.llvm.org/D86863 --- llvm/test/Analysis/MemorySSA/pr41853.ll | 2 +- llvm/test/CodeGen/PowerPC/no-dup-of-bdnz.ll | 2 +- llvm/test/CodeGen/X86/mmx-bitcast-fold.ll | 2 +- llvm/test/DebugInfo/Generic/pr40628.ll | 2 +- llvm/test/DebugInfo/salvage-cast-debug-info.ll | 2 +- llvm/test/Feature/OperandBundles/early-cse.ll | 2 +- llvm/test/Feature/OperandBundles/special-state.ll | 2 +- llvm/test/Other/debugcounter-earlycse.ll | 2 +- llvm/test/Other/invariant.group.ll | 4 ++-- llvm/test/Other/opt-bisect-legacy-pass-manager.ll | 6 +++--- llvm/test/Other/print-debug-counter.ll | 2 +- llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll | 2 +- llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll | 2 +- llvm/test/Transforms/EarlyCSE/AMDGPU/intrinsics.ll | 2 +- llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll | 2 +- llvm/test/Transforms/EarlyCSE/PowerPC/read-reg.ll | 2 +- llvm/test/Transforms/EarlyCSE/X86/preserve_memoryssa.ll | 2 +- llvm/test/Transforms/EarlyCSE/and_or.ll | 2 +- llvm/test/Transforms/EarlyCSE/atomics.ll | 2 +- llvm/test/Transforms/EarlyCSE/basic.ll | 2 +- llvm/test/Transforms/EarlyCSE/conditional.ll | 2 +- llvm/test/Transforms/EarlyCSE/const-speculation.ll | 2 +- llvm/test/Transforms/EarlyCSE/debug-info-undef.ll | 2 +- llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll | 2 +- llvm/test/Transforms/EarlyCSE/edge.ll | 2 +- llvm/test/Transforms/EarlyCSE/fence.ll | 2 +- llvm/test/Transforms/EarlyCSE/flags.ll | 2 +- llvm/test/Transforms/EarlyCSE/floatingpoint.ll | 2 +- llvm/test/Transforms/EarlyCSE/gc_relocate.ll | 2 +- llvm/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll | 2 +- llvm/test/Transforms/EarlyCSE/guards.ll | 2 +- llvm/test/Transforms/EarlyCSE/instsimplify-dom.ll | 2 +- llvm/test/Transforms/EarlyCSE/int_sideeffect.ll | 2 +- llvm/test/Transforms/EarlyCSE/invariant-loads.ll | 2 +- llvm/test/Transforms/EarlyCSE/invariant.start.ll | 2 +- llvm/test/Transforms/EarlyCSE/memoryssa.ll | 2 +- llvm/test/Transforms/EarlyCSE/phi.ll | 2 +- llvm/test/Transforms/EarlyCSE/pr33406.ll | 2 +- llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll | 2 +- llvm/test/Transforms/EarlyCSE/writeonly.ll | 2 +- llvm/test/Transforms/GVNHoist/pr37445.ll | 2 +- llvm/test/Transforms/GVNHoist/pr38807.ll | 2 +- llvm/test/Transforms/Inline/inline-indirect-chain.ll | 2 +- llvm/test/Transforms/InstCombine/invariant.group.ll | 2 +- llvm/test/Transforms/InstSimplify/ConstProp/allones.ll | 2 +- llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll | 2 +- llvm/test/Transforms/InstSimplify/ConstProp/math-2.ll | 2 +- llvm/test/Transforms/InstSimplify/ConstProp/rint.ll | 2 +- llvm/test/Transforms/InstSimplify/ConstProp/round.ll | 2 +- llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll | 2 +- .../InstSimplify/ConstProp/vscale-getelementptr.ll | 2 +- .../InstSimplify/ConstProp/vscale-shufflevector.ll | 2 +- llvm/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll | 2 +- llvm/test/Transforms/Reassociate/cse-pairs.ll | 2 +- 54 files changed, 57 insertions(+), 57 deletions(-) diff --git a/llvm/test/Analysis/MemorySSA/pr41853.ll b/llvm/test/Analysis/MemorySSA/pr41853.ll index 6dbc9d7826214..f7bf21c9f90f2 100644 --- a/llvm/test/Analysis/MemorySSA/pr41853.ll +++ b/llvm/test/Analysis/MemorySSA/pr41853.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -memoryssa -loop-simplify -early-cse-memssa -verify-memoryssa %s | FileCheck %s +; RUN: opt -S -memoryssa -loop-simplify -early-cse-memssa -earlycse-debug-hash -verify-memoryssa %s | FileCheck %s ; RUN: opt -S -memoryssa -loop-simplify -early-cse-memssa -enable-mssa-loop-dependency -verify-memoryssa %s | FileCheck %s ; REQUIRES: asserts target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/PowerPC/no-dup-of-bdnz.ll b/llvm/test/CodeGen/PowerPC/no-dup-of-bdnz.ll index 9f5ae661b5d76..82836f42e1c19 100644 --- a/llvm/test/CodeGen/PowerPC/no-dup-of-bdnz.ll +++ b/llvm/test/CodeGen/PowerPC/no-dup-of-bdnz.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse-memssa -loop-rotate -licm -loop-rotate -S %s -o - | FileCheck %s +; RUN: opt -early-cse-memssa -earlycse-debug-hash -loop-rotate -licm -loop-rotate -S %s -o - | FileCheck %s ; ModuleID = 'bugpoint-reduced-simplified.bc' source_filename = "bugpoint-output-8903f29.bc" target datalayout = "e-m:e-i64:64-n32:64" diff --git a/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll b/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll index fc7ce73a441e8..7d1987c1f6a74 100644 --- a/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll +++ b/llvm/test/CodeGen/X86/mmx-bitcast-fold.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-- -early-cse < %s -S | FileCheck %s +; RUN: opt -mtriple=x86_64-- -early-cse -earlycse-debug-hash < %s -S | FileCheck %s ; CHECK: @foo(x86_mmx bitcast (double 0.000000e+00 to x86_mmx)) diff --git a/llvm/test/DebugInfo/Generic/pr40628.ll b/llvm/test/DebugInfo/Generic/pr40628.ll index 69f09f7eb5bf9..af994de725066 100644 --- a/llvm/test/DebugInfo/Generic/pr40628.ll +++ b/llvm/test/DebugInfo/Generic/pr40628.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S %s -o - | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S %s -o - | FileCheck %s ; PR40628: The first load below is determined to be redundant by EarlyCSE. ; During salvaging, the corresponding dbg.value could have a DW_OP_deref used diff --git a/llvm/test/DebugInfo/salvage-cast-debug-info.ll b/llvm/test/DebugInfo/salvage-cast-debug-info.ll index f39ac46a66a4b..c4f2111bddf22 100644 --- a/llvm/test/DebugInfo/salvage-cast-debug-info.ll +++ b/llvm/test/DebugInfo/salvage-cast-debug-info.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -debugify -early-cse -S | FileCheck %s +; RUN: opt %s -debugify -early-cse -earlycse-debug-hash -S | FileCheck %s define i32 @foo(i64 %nose, i32 %more) { ; CHECK-LABEL: @foo( ; CHECK: call void @llvm.dbg.value(metadata i64 %nose, metadata [[V1:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned diff --git a/llvm/test/Feature/OperandBundles/early-cse.ll b/llvm/test/Feature/OperandBundles/early-cse.ll index fc201479d8ce8..cf06cd1e1f1db 100644 --- a/llvm/test/Feature/OperandBundles/early-cse.ll +++ b/llvm/test/Feature/OperandBundles/early-cse.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; While it is normally okay to do memory optimizations over calls to ; @readonly_function and @readnone_function, we cannot do that if diff --git a/llvm/test/Feature/OperandBundles/special-state.ll b/llvm/test/Feature/OperandBundles/special-state.ll index 56e337cc16b37..f51becaa50984 100644 --- a/llvm/test/Feature/OperandBundles/special-state.ll +++ b/llvm/test/Feature/OperandBundles/special-state.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; This test isn't directly related to EarlyCSE or varargs. It is just ; using these as a vehicle for testing the correctness of diff --git a/llvm/test/Other/debugcounter-earlycse.ll b/llvm/test/Other/debugcounter-earlycse.ll index 3d0a9cdbd3a23..daaf7c9e4a75f 100644 --- a/llvm/test/Other/debugcounter-earlycse.ll +++ b/llvm/test/Other/debugcounter-earlycse.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -S -debug-counter=early-cse-skip=1,early-cse-count=1 -early-cse < %s 2>&1 | FileCheck %s +; RUN: opt -S -debug-counter=early-cse-skip=1,early-cse-count=1 -early-cse -earlycse-debug-hash < %s 2>&1 | FileCheck %s ;; Test that, with debug counters on, we only optimize the second CSE opportunity. define i32 @test(i32 %a, i32 %b) { ; CHECK-LABEL: @test( diff --git a/llvm/test/Other/invariant.group.ll b/llvm/test/Other/invariant.group.ll index 3b2cacedbd947..36f1e33688e42 100644 --- a/llvm/test/Other/invariant.group.ll +++ b/llvm/test/Other/invariant.group.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; RUN: opt -S -gvn < %s | FileCheck %s ; RUN: opt -S -newgvn < %s | FileCheck %s ; RUN: opt -S -O3 < %s | FileCheck %s @@ -101,4 +101,4 @@ declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8* @llvm.strip.invariant.group.p0i8(i8*) -!0 = !{} \ No newline at end of file +!0 = !{} diff --git a/llvm/test/Other/opt-bisect-legacy-pass-manager.ll b/llvm/test/Other/opt-bisect-legacy-pass-manager.ll index bf89e80d49604..297f61230c9dd 100644 --- a/llvm/test/Other/opt-bisect-legacy-pass-manager.ll +++ b/llvm/test/Other/opt-bisect-legacy-pass-manager.ll @@ -83,14 +83,14 @@ ; Test a function pass. -; RUN: opt -disable-output -disable-verify -early-cse -opt-bisect-limit=-1 \ +; RUN: opt -disable-output -disable-verify -early-cse -earlycse-debug-hash -opt-bisect-limit=-1 \ ; RUN: %s 2>&1 | FileCheck %s --check-prefix=CHECK-EARLY-CSE ; CHECK-EARLY-CSE: BISECT: running pass ({{[0-9]+}}) Early CSE on function (f1) ; CHECK-EARLY-CSE: BISECT: running pass ({{[0-9]+}}) Early CSE on function (f2) ; CHECK-EARLY-CSE: BISECT: running pass ({{[0-9]+}}) Early CSE on function (f3) -; RUN: opt -disable-output -disable-verify -early-cse -opt-bisect-limit=0 %s \ -; RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-EARLY-CSE +; RUN: opt -disable-output -disable-verify -early-cse -earlycse-debug-hash -opt-bisect-limit=0 \ +; RUN: %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-EARLY-CSE ; CHECK-NOT-EARLY-CSE: BISECT: NOT running pass ({{[0-9]+}}) Early CSE on function (f1) ; CHECK-NOT-EARLY-CSE: BISECT: NOT running pass ({{[0-9]+}}) Early CSE on function (f2) ; CHECK-NOT-EARLY-CSE: BISECT: NOT running pass ({{[0-9]+}}) Early CSE on function (f3) diff --git a/llvm/test/Other/print-debug-counter.ll b/llvm/test/Other/print-debug-counter.ll index 3647f39026dcf..846817add4b84 100644 --- a/llvm/test/Other/print-debug-counter.ll +++ b/llvm/test/Other/print-debug-counter.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -S -debug-counter=early-cse-skip=1,early-cse-count=1 -early-cse \ +; RUN: opt -S -debug-counter=early-cse-skip=1,early-cse-count=1 -early-cse -earlycse-debug-hash \ ; RUN: -debug-counter=newgvn-vn-skip=1,newgvn-vn-count=2 -newgvn \ ; RUN: -instcombine -print-debug-counter < %s 2>&1 | FileCheck %s ;; Test debug counter prints correct info in right order. diff --git a/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll b/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll index 698d5d1e61c3e..18bc6c5922237 100644 --- a/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll +++ b/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -early-cse | FileCheck %s +; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -basic-aa -early-cse-memssa | FileCheck %s ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -passes=early-cse | FileCheck %s ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -aa-pipeline=basic-aa -passes=early-cse-memssa | FileCheck %s diff --git a/llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll b/llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll index 1a11fa17dd42d..16a5f07684c9a 100644 --- a/llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll +++ b/llvm/test/Transforms/EarlyCSE/AArch64/ldstN.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; RUN: opt -S -basic-aa -early-cse-memssa < %s | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" diff --git a/llvm/test/Transforms/EarlyCSE/AMDGPU/intrinsics.ll b/llvm/test/Transforms/EarlyCSE/AMDGPU/intrinsics.ll index 0fae469341d04..9333edab051b0 100644 --- a/llvm/test/Transforms/EarlyCSE/AMDGPU/intrinsics.ll +++ b/llvm/test/Transforms/EarlyCSE/AMDGPU/intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -mtriple=amdgcn-- -early-cse | FileCheck %s +; RUN: opt < %s -S -mtriple=amdgcn-- -early-cse -earlycse-debug-hash | FileCheck %s ; CHECK-LABEL: @no_cse ; CHECK: call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %in, i32 0, i32 0) diff --git a/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll b/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll index 6b42ee8d71efc..8e618b5cfab00 100644 --- a/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll +++ b/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -early-cse-memssa < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -early-cse-memssa -earlycse-debug-hash < %s | FileCheck %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" ; CHECK-LABEL: @memrealtime( diff --git a/llvm/test/Transforms/EarlyCSE/PowerPC/read-reg.ll b/llvm/test/Transforms/EarlyCSE/PowerPC/read-reg.ll index 95dd9d515cd93..9beb3b47c6a18 100644 --- a/llvm/test/Transforms/EarlyCSE/PowerPC/read-reg.ll +++ b/llvm/test/Transforms/EarlyCSE/PowerPC/read-reg.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; RUN: opt -S -basic-aa -early-cse-memssa < %s | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/EarlyCSE/X86/preserve_memoryssa.ll b/llvm/test/Transforms/EarlyCSE/X86/preserve_memoryssa.ll index 946293df6ca16..ac9e80d9c6e45 100644 --- a/llvm/test/Transforms/EarlyCSE/X86/preserve_memoryssa.ll +++ b/llvm/test/Transforms/EarlyCSE/X86/preserve_memoryssa.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -early-cse-memssa -verify-memoryssa -disable-output +; RUN: opt < %s -early-cse-memssa -earlycse-debug-hash -verify-memoryssa -disable-output ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/EarlyCSE/and_or.ll b/llvm/test/Transforms/EarlyCSE/and_or.ll index 28161ddfb3c6c..b70d0cea3f7fb 100644 --- a/llvm/test/Transforms/EarlyCSE/and_or.ll +++ b/llvm/test/Transforms/EarlyCSE/and_or.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s define i32 @test_01(i32 %a, i32 %b) { diff --git a/llvm/test/Transforms/EarlyCSE/atomics.ll b/llvm/test/Transforms/EarlyCSE/atomics.ll index 4284265d0aec0..4a4b76666344a 100644 --- a/llvm/test/Transforms/EarlyCSE/atomics.ll +++ b/llvm/test/Transforms/EarlyCSE/atomics.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -early-cse | FileCheck %s +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; CHECK-LABEL: @test12( diff --git a/llvm/test/Transforms/EarlyCSE/basic.ll b/llvm/test/Transforms/EarlyCSE/basic.ll index f69a8168602ac..5178e5a89e205 100644 --- a/llvm/test/Transforms/EarlyCSE/basic.ll +++ b/llvm/test/Transforms/EarlyCSE/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -early-cse | FileCheck %s +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; RUN: opt < %s -S -passes=early-cse | FileCheck %s diff --git a/llvm/test/Transforms/EarlyCSE/conditional.ll b/llvm/test/Transforms/EarlyCSE/conditional.ll index ff0acac5a357d..c4b3277633fe0 100644 --- a/llvm/test/Transforms/EarlyCSE/conditional.ll +++ b/llvm/test/Transforms/EarlyCSE/conditional.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s ; Can we CSE a known condition to a constant? diff --git a/llvm/test/Transforms/EarlyCSE/const-speculation.ll b/llvm/test/Transforms/EarlyCSE/const-speculation.ll index 5b7f2f5b69829..a531c14da770c 100644 --- a/llvm/test/Transforms/EarlyCSE/const-speculation.ll +++ b/llvm/test/Transforms/EarlyCSE/const-speculation.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S %s | FileCheck %s %mystruct = type { i32 } diff --git a/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll b/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll index b0fb8ff75ad39..2d6c5380394fb 100644 --- a/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll +++ b/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll @@ -1,4 +1,4 @@ -; RUN: opt -S %s -early-cse | FileCheck %s +; RUN: opt -S %s -early-cse -earlycse-debug-hash | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll b/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll index 35d0fd184968e..20a9805302742 100644 --- a/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll +++ b/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S %s -o - | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S %s -o - | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; Function Attrs: nounwind uwtable diff --git a/llvm/test/Transforms/EarlyCSE/edge.ll b/llvm/test/Transforms/EarlyCSE/edge.ll index 88bd05d4ec348..bd82502c22290 100644 --- a/llvm/test/Transforms/EarlyCSE/edge.ll +++ b/llvm/test/Transforms/EarlyCSE/edge.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s ; Same as GVN/edge.ll, but updated to reflect EarlyCSE's less powerful ; implementation. EarlyCSE currently doesn't exploit equality comparisons diff --git a/llvm/test/Transforms/EarlyCSE/fence.ll b/llvm/test/Transforms/EarlyCSE/fence.ll index 0f53edc332bd3..8fb50849ff93a 100644 --- a/llvm/test/Transforms/EarlyCSE/fence.ll +++ b/llvm/test/Transforms/EarlyCSE/fence.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; NOTE: This file is testing the current implementation. Some of ; the transforms used as negative tests below would be legal, but diff --git a/llvm/test/Transforms/EarlyCSE/flags.ll b/llvm/test/Transforms/EarlyCSE/flags.ll index c3e74df624282..9d24ade7d3757 100644 --- a/llvm/test/Transforms/EarlyCSE/flags.ll +++ b/llvm/test/Transforms/EarlyCSE/flags.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s declare void @use(i1) diff --git a/llvm/test/Transforms/EarlyCSE/floatingpoint.ll b/llvm/test/Transforms/EarlyCSE/floatingpoint.ll index a4c56bf0059ae..a4293f5eed9c1 100644 --- a/llvm/test/Transforms/EarlyCSE/floatingpoint.ll +++ b/llvm/test/Transforms/EarlyCSE/floatingpoint.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -early-cse | FileCheck %s +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; Ensure we don't simplify away additions vectors of +0.0's (same as scalars). diff --git a/llvm/test/Transforms/EarlyCSE/gc_relocate.ll b/llvm/test/Transforms/EarlyCSE/gc_relocate.ll index 3ec7e129ef523..435b081a9a20b 100644 --- a/llvm/test/Transforms/EarlyCSE/gc_relocate.ll +++ b/llvm/test/Transforms/EarlyCSE/gc_relocate.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s declare void @func() declare i32 @"personality_function"() diff --git a/llvm/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll b/llvm/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll index 57dbdd8831902..6e423eb93bf77 100644 --- a/llvm/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll +++ b/llvm/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -globals-aa -early-cse-memssa | FileCheck %s +; RUN: opt < %s -S -globals-aa -early-cse-memssa -earlycse-debug-hash | FileCheck %s define i16 @f1() readonly { ret i16 0 diff --git a/llvm/test/Transforms/EarlyCSE/guards.ll b/llvm/test/Transforms/EarlyCSE/guards.ll index 55ec46b186c6d..6a1bef9852061 100644 --- a/llvm/test/Transforms/EarlyCSE/guards.ll +++ b/llvm/test/Transforms/EarlyCSE/guards.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -early-cse < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s --check-prefixes=CHECK,NO_ASSUME ; RUN: opt < %s -S -basic-aa -early-cse-memssa --enable-knowledge-retention | FileCheck %s --check-prefixes=CHECK,USE_ASSUME diff --git a/llvm/test/Transforms/EarlyCSE/instsimplify-dom.ll b/llvm/test/Transforms/EarlyCSE/instsimplify-dom.ll index e9ba93a1852cb..2b281aa73e3c0 100644 --- a/llvm/test/Transforms/EarlyCSE/instsimplify-dom.ll +++ b/llvm/test/Transforms/EarlyCSE/instsimplify-dom.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s ; PR12231 diff --git a/llvm/test/Transforms/EarlyCSE/int_sideeffect.ll b/llvm/test/Transforms/EarlyCSE/int_sideeffect.ll index 1dccaab5e5f7d..f4d8fd25a63c3 100644 --- a/llvm/test/Transforms/EarlyCSE/int_sideeffect.ll +++ b/llvm/test/Transforms/EarlyCSE/int_sideeffect.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -early-cse | FileCheck %s +; RUN: opt -S < %s -early-cse -earlycse-debug-hash | FileCheck %s declare void @llvm.sideeffect() diff --git a/llvm/test/Transforms/EarlyCSE/invariant-loads.ll b/llvm/test/Transforms/EarlyCSE/invariant-loads.ll index c5246009b42b0..df76fd2d61bed 100644 --- a/llvm/test/Transforms/EarlyCSE/invariant-loads.ll +++ b/llvm/test/Transforms/EarlyCSE/invariant-loads.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -early-cse < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME ; RUN: opt -S -basic-aa -early-cse-memssa < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME ; RUN: opt -S -basic-aa -early-cse-memssa --enable-knowledge-retention < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME diff --git a/llvm/test/Transforms/EarlyCSE/invariant.start.ll b/llvm/test/Transforms/EarlyCSE/invariant.start.ll index 2202c09c1a0e9..a0e267d85569c 100644 --- a/llvm/test/Transforms/EarlyCSE/invariant.start.ll +++ b/llvm/test/Transforms/EarlyCSE/invariant.start.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: opt < %s -S -early-cse | FileCheck %s --check-prefixes=CHECK,NO_ASSUME +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s --check-prefixes=CHECK,NO_ASSUME ; RUN: opt < %s -S -early-cse --enable-knowledge-retention | FileCheck %s --check-prefixes=CHECK,USE_ASSUME ; RUN: opt < %s -S -passes=early-cse | FileCheck %s --check-prefixes=CHECK,NO_ASSUME diff --git a/llvm/test/Transforms/EarlyCSE/memoryssa.ll b/llvm/test/Transforms/EarlyCSE/memoryssa.ll index 5d33ed49bb1b4..3f670852a4d87 100644 --- a/llvm/test/Transforms/EarlyCSE/memoryssa.ll +++ b/llvm/test/Transforms/EarlyCSE/memoryssa.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -early-cse | FileCheck %s --check-prefix=CHECK-NOMEMSSA +; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s --check-prefix=CHECK-NOMEMSSA ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefix=CHECK-NOMEMSSA ; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='early-cse-memssa' | FileCheck %s diff --git a/llvm/test/Transforms/EarlyCSE/phi.ll b/llvm/test/Transforms/EarlyCSE/phi.ll index 2c2972be51041..e9c86ec5d3740 100644 --- a/llvm/test/Transforms/EarlyCSE/phi.ll +++ b/llvm/test/Transforms/EarlyCSE/phi.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; RUN: opt -basic-aa -early-cse-memssa -S < %s | FileCheck %s ; Most basic case, fully identical PHI nodes diff --git a/llvm/test/Transforms/EarlyCSE/pr33406.ll b/llvm/test/Transforms/EarlyCSE/pr33406.ll index 4d3312e1f0ac2..903b8bc9f2ace 100644 --- a/llvm/test/Transforms/EarlyCSE/pr33406.ll +++ b/llvm/test/Transforms/EarlyCSE/pr33406.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse-memssa -S %s | FileCheck %s +; RUN: opt -early-cse-memssa -earlycse-debug-hash -S %s | FileCheck %s ; CHECK: define void @patatino() { ; CHECK: for.cond: diff --git a/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll b/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll index 47a513f2d6a67..d83a42780c647 100644 --- a/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll +++ b/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s declare void @readnone_may_unwind() readnone diff --git a/llvm/test/Transforms/EarlyCSE/writeonly.ll b/llvm/test/Transforms/EarlyCSE/writeonly.ll index 0a3cd1c7401ca..b28af8535083c 100644 --- a/llvm/test/Transforms/EarlyCSE/writeonly.ll +++ b/llvm/test/Transforms/EarlyCSE/writeonly.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s @var = global i32 undef declare void @foo() nounwind diff --git a/llvm/test/Transforms/GVNHoist/pr37445.ll b/llvm/test/Transforms/GVNHoist/pr37445.ll index 817fea14077d0..82cdced2c6129 100644 --- a/llvm/test/Transforms/GVNHoist/pr37445.ll +++ b/llvm/test/Transforms/GVNHoist/pr37445.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -early-cse-memssa -gvn-hoist -S | FileCheck %s +; RUN: opt < %s -early-cse-memssa -earlycse-debug-hash -gvn-hoist -S | FileCheck %s ; Make sure opt won't crash and that this pair of ; instructions (load, icmp) is hoisted successfully diff --git a/llvm/test/Transforms/GVNHoist/pr38807.ll b/llvm/test/Transforms/GVNHoist/pr38807.ll index f8c7f7e636379..0fcfd2180c681 100644 --- a/llvm/test/Transforms/GVNHoist/pr38807.ll +++ b/llvm/test/Transforms/GVNHoist/pr38807.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -early-cse-memssa -gvn-hoist -S | FileCheck %s +; RUN: opt < %s -early-cse-memssa -earlycse-debug-hash -gvn-hoist -S | FileCheck %s ; Make sure opt doesn't crash. On top of that, the instructions ; of the side blocks should be hoisted to the entry block. diff --git a/llvm/test/Transforms/Inline/inline-indirect-chain.ll b/llvm/test/Transforms/Inline/inline-indirect-chain.ll index bf73ad35dade7..46b62da848619 100644 --- a/llvm/test/Transforms/Inline/inline-indirect-chain.ll +++ b/llvm/test/Transforms/Inline/inline-indirect-chain.ll @@ -1,4 +1,4 @@ -; RUN: opt -inline -early-cse < %s +; RUN: opt -inline -early-cse -earlycse-debug-hash < %s ; This test used to crash (PR35469). define void @func1() { diff --git a/llvm/test/Transforms/InstCombine/invariant.group.ll b/llvm/test/Transforms/InstCombine/invariant.group.ll index f3774ad6c14d2..7a33bfd090773 100644 --- a/llvm/test/Transforms/InstCombine/invariant.group.ll +++ b/llvm/test/Transforms/InstCombine/invariant.group.ll @@ -1,4 +1,4 @@ -; RUN: opt -instcombine -early-cse -S < %s | FileCheck %s +; RUN: opt -instcombine -early-cse -earlycse-debug-hash -S < %s | FileCheck %s ; CHECK-LABEL: define i8* @simplifyNullLaunder() diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/allones.ll b/llvm/test/Transforms/InstSimplify/ConstProp/allones.ll index 1315b3628475e..2dc381d0949bd 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/allones.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/allones.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S -o - %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S -o - %s | FileCheck %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64-ni:2" target triple = "armv7-unknown-linux-gnueabi" diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll b/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll index 5950950171023..b16910b947405 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/math-1.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -early-cse -S -o - %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S -o - %s | FileCheck %s declare double @acos(double) define double @f_acos() { diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/math-2.ll b/llvm/test/Transforms/InstSimplify/ConstProp/math-2.ll index 2c6584bd597d9..b890fbfd1a2e5 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/math-2.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/math-2.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -early-cse -S -o - %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S -o - %s | FileCheck %s declare double @atan2(double, double) define double @f_atan2() { diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll b/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll index 9ad794d6094b0..72a2abdbcf91f 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/rint.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s declare float @nearbyintf(float) #0 declare float @llvm.nearbyint.f32(float) #0 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/round.ll b/llvm/test/Transforms/InstSimplify/ConstProp/round.ll index d5b847810d5eb..6d9a89a690216 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/round.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/round.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s declare float @roundf(float) #0 declare float @llvm.round.f32(float) #0 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll b/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll index df13809171454..3548e95620be2 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/trunc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s declare float @truncf(float) #0 declare float @llvm.trunc.f32(float) #0 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll index 8e90961928c98..5d3d4a44c02c9 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-getelementptr.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64" diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll index 9c1f6730122e2..df9011636a35f 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll @@ -1,4 +1,4 @@ -; RUN: opt -early-cse -S < %s | FileCheck %s +; RUN: opt -early-cse -earlycse-debug-hash -S < %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64" diff --git a/llvm/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll b/llvm/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll index a08e07e9644a4..9a137fb3f3e92 100644 --- a/llvm/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll +++ b/llvm/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -nary-reassociate -early-cse -S | FileCheck %s +; RUN: opt < %s -nary-reassociate -early-cse -earlycse-debug-hash -S | FileCheck %s ; RUN: opt < %s -passes='nary-reassociate' -S | opt -early-cse -S | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" diff --git a/llvm/test/Transforms/Reassociate/cse-pairs.ll b/llvm/test/Transforms/Reassociate/cse-pairs.ll index 33397ea050c41..a920f49b59d78 100644 --- a/llvm/test/Transforms/Reassociate/cse-pairs.ll +++ b/llvm/test/Transforms/Reassociate/cse-pairs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -reassociate -early-cse -S < %s | FileCheck %s +; RUN: opt -reassociate -early-cse -earlycse-debug-hash -S < %s | FileCheck %s @num1 = local_unnamed_addr global i32 0, align 4 @num2 = local_unnamed_addr global i32 0, align 4 From 2966d6e80b77c97f65805920d4d3288885de53c9 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Fri, 4 Sep 2020 22:41:25 +0800 Subject: [PATCH 201/465] [MachOYAML] Allow handcrafting custom contents for DWARF sections. This patch enables users to handcraft custom contents for DWARF sections. If we specify the contents of DWARF sections both in the 'DWARF' entry and the 'content', yaml2obj will emit an error message. In addition, this patch helps remove the restriction that only the content of sections whose segname are __DWARF can be specified in the "DWARF" entry. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D87126 --- llvm/lib/ObjectYAML/MachOEmitter.cpp | 22 +++++---- .../ObjectYAML/MachO/DWARF-debug_ranges.yaml | 4 -- .../MachO/dwarf-content-conflict.yaml | 45 +++++++++++++++++++ .../X86/verify_overlapping_cu_ranges.yaml | 4 -- .../ARM_AArch64/fat-macho-dwarf.yaml | 14 ------ .../tools/llvm-gsymutil/X86/mach-dwarf.yaml | 7 --- .../MachO/unrecognized-debug-section.yaml | 9 +--- 7 files changed, 60 insertions(+), 45 deletions(-) create mode 100644 llvm/test/ObjectYAML/MachO/dwarf-content-conflict.yaml diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp index ecae386fd1ba9..9eba391032b98 100644 --- a/llvm/lib/ObjectYAML/MachOEmitter.cpp +++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -285,16 +285,20 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) { return createStringError( errc::invalid_argument, "wrote too much data somewhere, section offsets don't line up"); - if (0 == strncmp(&Sec.segname[0], "__DWARF", sizeof(Sec.segname))) { - StringRef SectName(Sec.sectname, - strnlen(Sec.sectname, sizeof(Sec.sectname))); - if (Obj.DWARF.getNonEmptySectionNames().count(SectName.substr(2))) { - auto EmitFunc = - DWARFYAML::getDWARFEmitterByName(SectName.substr(2)); - if (Error Err = EmitFunc(OS, Obj.DWARF)) - return Err; - } + StringRef SectName(Sec.sectname, + strnlen(Sec.sectname, sizeof(Sec.sectname))); + // If the section's content is specified in the 'DWARF' entry, we will + // emit it regardless of the section's segname. + if (Obj.DWARF.getNonEmptySectionNames().count(SectName.substr(2))) { + if (Sec.content) + return createStringError(errc::invalid_argument, + "cannot specify section '" + SectName + + "' contents in the 'DWARF' entry and " + "the 'content' at the same time"); + auto EmitFunc = DWARFYAML::getDWARFEmitterByName(SectName.substr(2)); + if (Error Err = EmitFunc(OS, Obj.DWARF)) + return Err; continue; } diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml index fced0be79d389..8948bf92b7d76 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml @@ -93,7 +93,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 011101030E1305110155170000022E00030E110112060000032E00030E11011201000000 - sectname: __debug_info segname: __DWARF addr: 0x0000000000000024 @@ -106,7 +105,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 520000000400000000000801010000000400000000000000000000000000020D000000000000000000000020000000031700000000000000000000003000000000000000022100000000100000000000000010000000 - sectname: __debug_ranges segname: __DWARF addr: 0x000000000000007A @@ -119,7 +117,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 0000000000000000200000000000000000000000000000003000000000000000FFFFFFFFFFFFFFFF00100000000000000000000000000000001000000000000000000000000000000000000000000000 - sectname: __debug_str segname: __DWARF addr: 0x00000000000000CA @@ -132,7 +129,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 002F746D702F6D61696E2E630073747269707065643100737472697070656432006D61696E00 - cmd: LC_SYMTAB cmdsize: 24 symoff: 0 diff --git a/llvm/test/ObjectYAML/MachO/dwarf-content-conflict.yaml b/llvm/test/ObjectYAML/MachO/dwarf-content-conflict.yaml new file mode 100644 index 0000000000000..a4b2b4810c5a5 --- /dev/null +++ b/llvm/test/ObjectYAML/MachO/dwarf-content-conflict.yaml @@ -0,0 +1,45 @@ +## Test that yaml2obj emits an error message if we specify the DWARF section +## contents both in the 'DWARF' entry and in the 'content'. + +# RUN: not yaml2obj %s 2>&1 | FileCheck %s --check-prefix=CONFLICT + +# CONFLICT: yaml2obj: error: cannot specify section '__debug_str' contents in the 'DWARF' entry and the 'content' at the same time + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x0000000A + ncmds: 1 + sizeofcmds: 232 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DWARF + vmaddr: 0x00 + vmsize: 0x00 + fileoff: 0x00 + filesize: 0x00 + maxprot: 0 + initprot: 0 + nsects: 1 + flags: 0 + Sections: + - sectname: __debug_str + segname: __DWARF + addr: 0x00 + size: 12 + offset: 528 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 1234 +DWARF: + debug_str: [ a, abc ] diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_cu_ranges.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_cu_ranges.yaml index 0775de8907ead..b0970cdac8b24 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_cu_ranges.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_cu_ranges.yaml @@ -101,7 +101,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 011101030E1305110155170000022E00030E110112060000032E00030E11011201000000 - sectname: __debug_info segname: __DWARF addr: 0x0000000000000024 @@ -114,7 +113,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 630000000400000000000801010000000200000000000000000000000000020D0000000000000000000000200000000317000000000000000000000030000000000000000221000000001000000000000000100000022600000000200000000000000010000000 - sectname: __debug_ranges segname: __DWARF addr: 0x000000000000008B @@ -127,7 +125,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: '0020000000000000003000000000000000000000000000002000000000000000000000000000000030000000000000000010000000000000002000000000000000000000000000000000000000000000' - sectname: __debug_str segname: __DWARF addr: 0x00000000000000DB @@ -140,7 +137,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 002F746D702F6D61696E2E630073747269707065643100737472697070656432006D61696E00666F6F00 - cmd: LC_SYMTAB cmdsize: 24 symoff: 0 diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/fat-macho-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/fat-macho-dwarf.yaml index d516b3704e71c..0315e18795bc0 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/fat-macho-dwarf.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/fat-macho-dwarf.yaml @@ -139,7 +139,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 37000000040020000000010101FB0E0D000101010100000001000001006D61696E2E6370700000000000000502F0BF00000105020A9F0206000101 - sectname: __debug_pubnames segname: __DWARF addr: 0x000000000000D03B @@ -152,7 +151,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 1700000002000000000077000000260000006D61696E0000000000 - sectname: __debug_pubtypes segname: __DWARF addr: 0x000000000000D056 @@ -165,7 +163,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 1F0000000200000000007700000059000000696E74006F000000636861720000000000 - sectname: __debug_aranges segname: __DWARF addr: 0x000000000000D079 @@ -178,7 +175,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 1C000000020000000000040000000000F0BF0000100000000000000000000000 - sectname: __debug_info segname: __DWARF addr: 0x000000000000D099 @@ -191,7 +187,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 73000000040000000000040101000000040031000000000000003A000000F0BF00001000000002F0BF00001000000001573F0000000101590000000103027D044400000001015900000003027D004900000001016000000000044E00000005040565000000056A000000066F0000000452000000060100 - sectname: __debug_abbrev segname: __DWARF addr: 0x000000000000D110 @@ -204,7 +199,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 011101250E1305030E10171B0EB44219110112060000022E01110112064018030E3A0B3B0B49103F19E37F0C00000305000218030E3A0B3B0B49100000042400030E3E0B0B0B0000050F00491000000626004910000000 - sectname: __debug_str segname: __DWARF addr: 0x000000000000D167 @@ -217,7 +211,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 004170706C6520636C616E672076657273696F6E2031312E302E302028636C616E672D313130302E302E33332E313729006D61696E2E637070002F746D70006D61696E0061726763006172677600696E74006368617200 - sectname: __apple_names segname: __DWARF addr: 0x000000000000D1BE @@ -614,7 +607,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 3B000000040020000000010101FB0E0D000101010100000001000001006D61696E2E63707000000000000009029C7F0000010000000105020AF3020C000101 - sectname: __debug_pubnames segname: __DWARF addr: 0x000000010000903F @@ -627,7 +619,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 170000000200000000007E0000002A0000006D61696E0000000000 - sectname: __debug_pubtypes segname: __DWARF addr: 0x000000010000905A @@ -640,7 +631,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 1F0000000200000000007E00000060000000696E740076000000636861720000000000 - sectname: __debug_aranges segname: __DWARF addr: 0x000000010000907D @@ -653,7 +643,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 2C0000000200000000000800000000009C7F0000010000001C0000000000000000000000000000000000000000000000 - sectname: __debug_info segname: __DWARF addr: 0x00000001000090AD @@ -666,7 +655,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 7A000000040000000000080101000000040031000000000000003A0000009C7F0000010000001C000000029C7F0000010000001C000000016F3F0000000101600000000302910844000000010160000000030291004900000001016700000000044E0000000504056C000000057100000006760000000452000000060100 - sectname: __debug_abbrev segname: __DWARF addr: 0x000000010000912B @@ -679,7 +667,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 011101250E1305030E10171B0EB44219110112060000022E01110112064018030E3A0B3B0B49103F1900000305000218030E3A0B3B0B49100000042400030E3E0B0B0B0000050F00491000000626004910000000 - sectname: __debug_str segname: __DWARF addr: 0x000000010000917F @@ -692,7 +679,6 @@ Slices: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 004170706C6520636C616E672076657273696F6E2031312E302E302028636C616E672D313130302E302E33332E313729006D61696E2E637070002F746D70006D61696E0061726763006172677600696E74006368617200 - sectname: __apple_names segname: __DWARF addr: 0x00000001000091D6 diff --git a/llvm/test/tools/llvm-gsymutil/X86/mach-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/X86/mach-dwarf.yaml index a1d3a50c6c82f..8a358ea0700c2 100644 --- a/llvm/test/tools/llvm-gsymutil/X86/mach-dwarf.yaml +++ b/llvm/test/tools/llvm-gsymutil/X86/mach-dwarf.yaml @@ -215,7 +215,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 66000000040020000000010101FB0E0D000101010100000001000001006D61696E2E6370700000000000000902700F00000100000001050C0A75050B063C05033C0204000101000902900F00000100000015050E0A083D050C63050B063C0506063F05023D0202000101 - sectname: __debug_pubnames segname: __DWARF addr: 0x000000010000206A @@ -228,7 +227,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 2B000000020000000000E00000002A0000005F5A33666F6F69002A000000666F6F00690000006D61696E0000000000 - sectname: __debug_pubtypes segname: __DWARF addr: 0x0000000100002099 @@ -241,7 +239,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 1F000000020000000000E000000062000000696E7400D8000000636861720000000000 - sectname: __debug_aranges segname: __DWARF addr: 0x00000001000020BC @@ -254,7 +251,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 3C000000020000000000080000000000700F0000010000001100000000000000900F000001000000250000000000000000000000000000000000000000000000 - sectname: __debug_info segname: __DWARF addr: 0x00000001000020FC @@ -267,7 +263,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: DC000000040000000000080101000000040031000000000000003A000000700F0000010000004500000002700F000001000000110000000156460000000302917C5600000000043F0000004700000001016200000001054B00000001016200000000064D000000050407900F000001000000250000000156510000000104620000000802917456000000010462000000080291685B0000000104C9000000090291644B0000000105620000000A46000000AA0F0000010000000600000001050302917C5600000000000BCE0000000BD30000000CD80000000660000000060100 - sectname: __debug_abbrev segname: __DWARF addr: 0x00000001000021DC @@ -280,7 +275,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 011101250E1305030E10171B0EB44219110112060000022E0111011206401831100000030500021831100000042E016E0E030E3A0B3B0B49103F19200B0000050500030E3A0B3B0B49100000062400030E3E0B0B0B0000072E01110112064018030E3A0B3B0B49103F1900000805000218030E3A0B3B0B491000000934000218030E3A0B3B0B491000000A1D01311011011206580B590B00000B0F00491000000C26004910000000 - sectname: __debug_str segname: __DWARF addr: 0x0000000100002284 @@ -293,7 +287,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 004170706C6520636C616E672076657273696F6E2031312E302E302028636C616E672D313130302E302E33332E313729006D61696E2E637070002F746D70005F5A33666F6F6900666F6F006900696E74006D61696E00617267630061726776006368617200 - sectname: __apple_names segname: __DWARF addr: 0x00000001000022E9 diff --git a/llvm/test/tools/obj2yaml/MachO/unrecognized-debug-section.yaml b/llvm/test/tools/obj2yaml/MachO/unrecognized-debug-section.yaml index 161805b30834a..4583fdde27fb0 100644 --- a/llvm/test/tools/obj2yaml/MachO/unrecognized-debug-section.yaml +++ b/llvm/test/tools/obj2yaml/MachO/unrecognized-debug-section.yaml @@ -1,16 +1,11 @@ ## Test that macho2yaml dumps the __debug_foo section (unrecognized debug section) ## as a raw content section. -## Due to the current implementation of yaml2macho being buggy, we cannot generate a DWARF section -## where the sectname starts with '__debug_' and the segname is '__DWARF', from a raw content section. -## We've slightly modified the segname to be '__FOO'. macho2yaml will still treat it as a debug -## section. - # RUN: yaml2obj %s | obj2yaml | FileCheck %s --check-prefix=UNRECOGNIZED # UNRECOGNIZED: Sections: # UNRECOGNIZED-NEXT: - sectname: __debug_foo -# UNRECOGNIZED-NEXT: segname: __FOO +# UNRECOGNIZED-NEXT: segname: __DWARF # UNRECOGNIZED-NEXT: addr: 0x0000000000000000 # UNRECOGNIZED-NEXT: size: 5 # UNRECOGNIZED-NEXT: offset: 0x00000210 @@ -48,7 +43,7 @@ LoadCommands: flags: 0 Sections: - sectname: __debug_foo - segname: __FOO + segname: __DWARF addr: 0x00 size: 5 offset: 528 From dbf04aaade235a0d76c6ad549c091c9fd0ada0e8 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 4 Sep 2020 10:47:20 -0400 Subject: [PATCH 202/465] Revert "[Asan] Cleanup atomic usage in allocator" This reverts commit 8b8be6f38ab568d40869205389a002f32f6558a2 and follow-ups 99a93c3a223e3bfc9a9781bfbf98d2fd4551f923, a9c0bf04043462d43013bc5616aa48f6d3e16b88, 48ac5b4833b60f00f0923db11ea31e7316bc78c6. It breaks building on Windows, see https://reviews.llvm.org/D86917#2255872 --- compiler-rt/lib/asan/asan_allocator.cpp | 159 ++++++++---------- .../sanitizer_allocator_combined.h | 1 + .../sanitizer_allocator_primary64.h | 9 +- 3 files changed, 71 insertions(+), 98 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index f5c273e7fc25b..0e9add1ce7370 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -72,14 +72,14 @@ static const uptr kAllocBegMagic = 0xCC6E96B9; struct ChunkHeader { // 1-st 8 bytes. - atomic_uint8_t chunk_state; - u32 alloc_tid : 24; - - u32 free_tid : 24; - u32 from_memalign : 1; - u32 alloc_type : 2; - u32 rz_log : 3; - u32 lsan_tag : 2; + u32 chunk_state : 8; // Must be first. + u32 alloc_tid : 24; + + u32 free_tid : 24; + u32 from_memalign : 1; + u32 alloc_type : 2; + u32 rz_log : 3; + u32 lsan_tag : 2; // 2-nd 8 bytes // This field is used for small sizes. For large sizes it is equal to // SizeClassMap::kMaxSize and the actual size is stored in the @@ -88,7 +88,7 @@ struct ChunkHeader { // align < 8 -> 0 // else -> log2(min(align, 512)) - 2 u32 user_requested_alignment_log : 3; - atomic_uint32_t alloc_context_id; + u32 alloc_context_id; }; struct ChunkBase : ChunkHeader { @@ -101,15 +101,14 @@ static const uptr kChunkHeader2Size = sizeof(ChunkBase) - kChunkHeaderSize; COMPILER_CHECK(kChunkHeaderSize == 16); COMPILER_CHECK(kChunkHeader2Size <= 16); +// Every chunk of memory allocated by this allocator can be in one of 3 states: +// CHUNK_AVAILABLE: the chunk is in the free list and ready to be allocated. +// CHUNK_ALLOCATED: the chunk is allocated and not yet freed. +// CHUNK_QUARANTINE: the chunk was freed and put into quarantine zone. enum { - // Either just allocated by underlying allocator, but AsanChunk is not yet - // ready, or almost returned to undelying allocator and AsanChunk is already - // meaningless. - CHUNK_INVALID = 0, - // The chunk is allocated and not yet freed. - CHUNK_ALLOCATED = 2, - // The chunk was freed and put into quarantine zone. - CHUNK_QUARANTINE = 3, + CHUNK_AVAILABLE = 0, // 0 is the default value even if we didn't set it. + CHUNK_ALLOCATED = 2, + CHUNK_QUARANTINE = 3 }; struct AsanChunk: ChunkBase { @@ -118,7 +117,7 @@ struct AsanChunk: ChunkBase { if (user_requested_size != SizeClassMap::kMaxSize) return user_requested_size; return *reinterpret_cast( - get_allocator().GetMetaData(AllocBeg(locked_version))); + get_allocator().GetMetaData(AllocBeg(locked_version))); } void *AllocBeg(bool locked_version = false) { if (from_memalign) { @@ -141,12 +140,8 @@ struct QuarantineCallback { } void Recycle(AsanChunk *m) { - u8 old_chunk_state = CHUNK_QUARANTINE; - if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, - CHUNK_INVALID, memory_order_acquire)) { - CHECK_EQ(old_chunk_state, CHUNK_QUARANTINE); - } - + CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); + atomic_store((atomic_uint8_t*)m, CHUNK_AVAILABLE, memory_order_relaxed); CHECK_NE(m->alloc_tid, kInvalidTid); CHECK_NE(m->free_tid, kInvalidTid); PoisonShadow(m->Beg(), @@ -306,25 +301,22 @@ struct Allocator { // housekeeping chunk, like TransferBatch. Start by assuming the former. AsanChunk *ac = GetAsanChunk((void *)chunk); uptr allocated_size = allocator.GetActuallyAllocatedSize((void *)ac); - if (atomic_load(&ac->chunk_state, memory_order_acquire) == - CHUNK_ALLOCATED) { - uptr beg = ac->Beg(); - uptr end = ac->Beg() + ac->UsedSize(true); - uptr chunk_end = chunk + allocated_size; - if (chunk < beg && beg < end && end <= chunk_end) { - // Looks like a valid AsanChunk in use, poison redzones only. - PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); - uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); - FastPoisonShadowPartialRightRedzone( - end_aligned_down, end - end_aligned_down, - chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); - return; - } + uptr beg = ac->Beg(); + uptr end = ac->Beg() + ac->UsedSize(true); + uptr chunk_end = chunk + allocated_size; + if (chunk < beg && beg < end && end <= chunk_end && + ac->chunk_state == CHUNK_ALLOCATED) { + // Looks like a valid AsanChunk in use, poison redzones only. + PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); + uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); + FastPoisonShadowPartialRightRedzone( + end_aligned_down, end - end_aligned_down, + chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); + } else { + // This is either not an AsanChunk or freed or quarantined AsanChunk. + // In either case, poison everything. + PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); } - - // This is either not an AsanChunk or freed or quarantined AsanChunk. - // In either case, poison everything. - PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); } void ReInitialize(const AllocatorOptions &options) { @@ -389,17 +381,14 @@ struct Allocator { AsanChunk *right_chunk) { // Prefer an allocated chunk over freed chunk and freed chunk // over available chunk. - u8 left_state = atomic_load(&left_chunk->chunk_state, memory_order_relaxed); - u8 right_state = - atomic_load(&right_chunk->chunk_state, memory_order_relaxed); - if (left_state != right_state) { - if (left_state == CHUNK_ALLOCATED) + if (left_chunk->chunk_state != right_chunk->chunk_state) { + if (left_chunk->chunk_state == CHUNK_ALLOCATED) return left_chunk; - if (right_state == CHUNK_ALLOCATED) + if (right_chunk->chunk_state == CHUNK_ALLOCATED) return right_chunk; - if (left_state == CHUNK_QUARANTINE) + if (left_chunk->chunk_state == CHUNK_QUARANTINE) return left_chunk; - if (right_state == CHUNK_QUARANTINE) + if (right_chunk->chunk_state == CHUNK_QUARANTINE) return right_chunk; } // Same chunk_state: choose based on offset. @@ -414,10 +403,9 @@ struct Allocator { bool UpdateAllocationStack(uptr addr, BufferedStackTrace *stack) { AsanChunk *m = GetAsanChunkByAddr(addr); if (!m) return false; - if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) - return false; + if (m->chunk_state != CHUNK_ALLOCATED) return false; if (m->Beg() != addr) return false; - atomic_store(&m->alloc_context_id, StackDepotPut(*stack), + atomic_store((atomic_uint32_t *)&m->alloc_context_id, StackDepotPut(*stack), memory_order_relaxed); return true; } @@ -519,7 +507,7 @@ struct Allocator { m->free_tid = kInvalidTid; m->from_memalign = user_beg != beg_plus_redzone; if (alloc_beg != chunk_beg) { - CHECK_LE(alloc_beg + 2 * sizeof(uptr), chunk_beg); + CHECK_LE(alloc_beg+ 2 * sizeof(uptr), chunk_beg); reinterpret_cast(alloc_beg)[0] = kAllocBegMagic; reinterpret_cast(alloc_beg)[1] = chunk_beg; } @@ -536,8 +524,7 @@ struct Allocator { } m->user_requested_alignment_log = user_requested_alignment_log; - atomic_store(&m->alloc_context_id, StackDepotPut(*stack), - memory_order_relaxed); + m->alloc_context_id = StackDepotPut(*stack); uptr size_rounded_down_to_granularity = RoundDownTo(size, SHADOW_GRANULARITY); @@ -570,7 +557,7 @@ struct Allocator { : __lsan::kDirectlyLeaked; #endif // Must be the last mutation of metadata in this function. - atomic_store(&m->chunk_state, CHUNK_ALLOCATED, memory_order_release); + atomic_store((atomic_uint8_t *)m, CHUNK_ALLOCATED, memory_order_release); ASAN_MALLOC_HOOK(res, size); return res; } @@ -578,10 +565,10 @@ struct Allocator { // Set quarantine flag if chunk is allocated, issue ASan error report on // available and quarantined chunks. Return true on success, false otherwise. bool AtomicallySetQuarantineFlagIfAllocated(AsanChunk *m, void *ptr, - BufferedStackTrace *stack) { + BufferedStackTrace *stack) { u8 old_chunk_state = CHUNK_ALLOCATED; // Flip the chunk_state atomically to avoid race on double-free. - if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, + if (!atomic_compare_exchange_strong((atomic_uint8_t *)m, &old_chunk_state, CHUNK_QUARANTINE, memory_order_acquire)) { ReportInvalidFree(ptr, old_chunk_state, stack); @@ -595,8 +582,7 @@ struct Allocator { // Expects the chunk to already be marked as quarantined by using // AtomicallySetQuarantineFlagIfAllocated. void QuarantineChunk(AsanChunk *m, void *ptr, BufferedStackTrace *stack) { - CHECK_EQ(atomic_load(&m->chunk_state, memory_order_relaxed), - CHUNK_QUARANTINE); + CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); CHECK_GE(m->alloc_tid, 0); if (SANITIZER_WORDSIZE == 64) // On 32-bits this resides in user area. CHECK_EQ(m->free_tid, kInvalidTid); @@ -691,7 +677,7 @@ struct Allocator { void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC, true); if (new_ptr) { - u8 chunk_state = atomic_load(&m->chunk_state, memory_order_acquire); + u8 chunk_state = m->chunk_state; if (chunk_state != CHUNK_ALLOCATED) ReportInvalidFree(old_ptr, chunk_state, stack); CHECK_NE(REAL(memcpy), nullptr); @@ -735,8 +721,7 @@ struct Allocator { // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). AsanChunk *GetAsanChunk(void *alloc_beg) { - if (!alloc_beg) - return nullptr; + if (!alloc_beg) return nullptr; if (!allocator.FromPrimary(alloc_beg)) { uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); AsanChunk *m = reinterpret_cast(meta[1]); @@ -752,13 +737,11 @@ struct Allocator { } AsanChunk *GetAsanChunkDebug(void *alloc_beg) { - if (!alloc_beg) - return nullptr; + if (!alloc_beg) return nullptr; if (!allocator.FromPrimary(alloc_beg)) { uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); AsanChunk *m = reinterpret_cast(meta[1]); - Printf("GetAsanChunkDebug1 alloc_beg %p meta %p m %p\n", alloc_beg, meta, - m); + Printf("GetAsanChunkDebug1 alloc_beg %p meta %p m %p\n", alloc_beg, meta, m); return m; } uptr *alloc_magic = reinterpret_cast(alloc_beg); @@ -771,6 +754,7 @@ struct Allocator { return reinterpret_cast(alloc_beg); } + AsanChunk *GetAsanChunkByAddr(uptr p) { void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast(p)); return GetAsanChunk(alloc_beg); @@ -786,16 +770,14 @@ struct Allocator { AsanChunk *GetAsanChunkByAddrFastLockedDebug(uptr p) { void *alloc_beg = allocator.GetBlockBeginFastLockedDebug(reinterpret_cast(p)); - Printf("GetAsanChunkByAddrFastLockedDebug p %p alloc_beg %p\n", p, - alloc_beg); + Printf("GetAsanChunkByAddrFastLockedDebug p %p alloc_beg %p\n", p, alloc_beg); return GetAsanChunkDebug(alloc_beg); } uptr AllocationSize(uptr p) { AsanChunk *m = GetAsanChunkByAddr(p); if (!m) return 0; - if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) - return 0; + if (m->chunk_state != CHUNK_ALLOCATED) return 0; if (m->Beg() != p) return 0; return m->UsedSize(); } @@ -861,16 +843,13 @@ static AsanAllocator &get_allocator() { } bool AsanChunkView::IsValid() const { - return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) != - CHUNK_INVALID; + return chunk_ && chunk_->chunk_state != CHUNK_AVAILABLE; } bool AsanChunkView::IsAllocated() const { - return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == - CHUNK_ALLOCATED; + return chunk_ && chunk_->chunk_state == CHUNK_ALLOCATED; } bool AsanChunkView::IsQuarantined() const { - return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == - CHUNK_QUARANTINE; + return chunk_ && chunk_->chunk_state == CHUNK_QUARANTINE; } uptr AsanChunkView::Beg() const { return chunk_->Beg(); } uptr AsanChunkView::End() const { return Beg() + UsedSize(); } @@ -891,9 +870,7 @@ static StackTrace GetStackTraceFromId(u32 id) { return res; } -u32 AsanChunkView::GetAllocStackId() const { - return atomic_load(&chunk_->alloc_context_id, memory_order_relaxed); -} +u32 AsanChunkView::GetAllocStackId() const { return chunk_->alloc_context_id; } u32 AsanChunkView::GetFreeStackId() const { return chunk_->free_context_id; } StackTrace AsanChunkView::GetAllocStack() const { @@ -1058,7 +1035,7 @@ void AsanSoftRssLimitExceededCallback(bool limit_exceeded) { instance.SetRssLimitExceeded(limit_exceeded); } -} // namespace __asan +} // namespace __asan // --- Implementation of LSan-specific functions --- {{{1 namespace __lsan { @@ -1078,10 +1055,10 @@ void GetAllocatorGlobalRange(uptr *begin, uptr *end) { uptr PointsIntoChunk(void* p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(addr); - if (!m || atomic_load(&m->chunk_state, memory_order_acquire) != - __asan::CHUNK_ALLOCATED) - return 0; + if (!m) return 0; uptr chunk = m->Beg(); + if (m->chunk_state != __asan::CHUNK_ALLOCATED) + return 0; if (m->AddrIsInside(addr, /*locked_version=*/true)) return chunk; if (IsSpecialCaseOfOperatorNew0(chunk, m->UsedSize(/*locked_version*/ true), @@ -1095,8 +1072,7 @@ extern "C" SANITIZER_WEAK_ATTRIBUTE const char *__lsan_current_stage; void GetUserBeginDebug(uptr chunk) { Printf("GetUserBeginDebug1 chunk %p\n", chunk); - __asan::AsanChunk *m = - __asan::instance.GetAsanChunkByAddrFastLockedDebug(chunk); + __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLockedDebug(chunk); Printf("GetUserBeginDebug2 m %p\n", m); } @@ -1123,8 +1099,7 @@ LsanMetadata::LsanMetadata(uptr chunk) { bool LsanMetadata::allocated() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return atomic_load(&m->chunk_state, memory_order_relaxed) == - __asan::CHUNK_ALLOCATED; + return m->chunk_state == __asan::CHUNK_ALLOCATED; } ChunkTag LsanMetadata::tag() const { @@ -1144,7 +1119,7 @@ uptr LsanMetadata::requested_size() const { u32 LsanMetadata::stack_trace_id() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return atomic_load(&m->alloc_context_id, memory_order_relaxed); + return m->alloc_context_id; } void ForEachChunk(ForEachChunkCallback callback, void *arg) { @@ -1155,9 +1130,7 @@ IgnoreObjectResult IgnoreObjectLocked(const void *p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddr(addr); if (!m) return kIgnoreObjectInvalid; - if ((atomic_load(&m->chunk_state, memory_order_acquire) == - __asan::CHUNK_ALLOCATED) && - m->AddrIsInside(addr)) { + if ((m->chunk_state == __asan::CHUNK_ALLOCATED) && m->AddrIsInside(addr)) { if (m->lsan_tag == kIgnored) return kIgnoreObjectAlreadyIgnored; m->lsan_tag = __lsan::kIgnored; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h index 0cf483da1e5c8..6d73784d77d09 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h @@ -148,6 +148,7 @@ class CombinedAllocator { return secondary_.GetBlockBeginFastLocked(p); } + uptr GetActuallyAllocatedSize(void *p) { if (primary_.PointerIsMine(p)) return primary_.GetActuallyAllocatedSize(p); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h index a6126fc6265eb..7af469c56fd6a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h @@ -203,8 +203,7 @@ class SizeClassAllocator64 { uptr class_id = GetSizeClass(p); uptr size = ClassIdToSize(class_id); Printf("GetBlockBeginDebug1 p %p class_id %p size %p\n", p, class_id, size); - if (!size) - return nullptr; + if (!size) return nullptr; uptr chunk_idx = GetChunkIdx((uptr)p, size); uptr reg_beg = GetRegionBegin(p); uptr beg = chunk_idx * size; @@ -213,16 +212,16 @@ class SizeClassAllocator64 { "GetBlockBeginDebug2 chunk_idx %p reg_beg %p beg %p next_beg %p " "kNumClasses %p\n", chunk_idx, reg_beg, beg, next_beg, kNumClasses); - if (class_id >= kNumClasses) - return nullptr; + if (class_id >= kNumClasses) return nullptr; const RegionInfo *region = AddressSpaceView::Load(GetRegionInfo(class_id)); Printf("GetBlockBeginDebug3 region %p region->mapped_user %p\n", region, region->mapped_user); if (region->mapped_user >= next_beg) - return reinterpret_cast(reg_beg + beg); + return reinterpret_cast(reg_beg + beg); return nullptr; } + uptr GetActuallyAllocatedSize(void *p) { CHECK(PointerIsMine(p)); return ClassIdToSize(GetSizeClass(p)); From fab60665ebc51b1b6c132e368fee00865d4d3942 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 4 Sep 2020 11:05:52 -0400 Subject: [PATCH 203/465] sanitizer_common: Implement COMPILER_CHECK using static_assert Since this is an internal header, we can just assume static_assert exists. If this doesn't upset any bots, I'll replace all uses of COMPILER_CHECK in a follow-up. --- compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h index d0ffc79b06107..84973eedda60a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h @@ -333,14 +333,10 @@ void NORETURN CheckFailed(const char *file, int line, const char *cond, #define UNIMPLEMENTED() UNREACHABLE("unimplemented") -#define COMPILER_CHECK(pred) IMPL_COMPILER_ASSERT(pred, __LINE__) +#define COMPILER_CHECK(pred) static_assert(pred, "") #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) -#define IMPL_PASTE(a, b) a##b -#define IMPL_COMPILER_ASSERT(pred, line) \ - typedef char IMPL_PASTE(assertion_failed_##_, line)[2*(int)(pred)-1] - // Limits for integral types. We have to redefine it in case we don't // have stdint.h (like in Visual Studio 9). #undef __INT64_C From 1ffcbe35ae0e136d7dd4cdd77eda306cc98b00e7 Mon Sep 17 00:00:00 2001 From: Muhammad Asif Manzoor Date: Fri, 4 Sep 2020 11:11:34 -0400 Subject: [PATCH 204/465] [AArch64][SVE] Add lowering for rounding operations Add the functionality to lower SVE rounding operations for passthru variant. Created a new test case file for all rounding operations. Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D86793 --- .../Target/AArch64/AArch64ISelLowering.cpp | 48 ++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 6 + .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 20 +- llvm/test/CodeGen/AArch64/sve-fp-rounding.ll | 485 ++++++++++++++++++ llvm/test/CodeGen/AArch64/sve-fp.ll | 63 --- 5 files changed, 552 insertions(+), 70 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-fp-rounding.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e1b79393f25f2..063644716a654 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -139,6 +139,12 @@ static bool isMergePassthruOpcode(unsigned Opc) { case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU: case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU: case AArch64ISD::FCEIL_MERGE_PASSTHRU: + case AArch64ISD::FFLOOR_MERGE_PASSTHRU: + case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU: + case AArch64ISD::FRINT_MERGE_PASSTHRU: + case AArch64ISD::FROUND_MERGE_PASSTHRU: + case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU: + case AArch64ISD::FTRUNC_MERGE_PASSTHRU: return true; } } @@ -976,6 +982,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FSUB, VT, Custom); setOperationAction(ISD::FCEIL, VT, Custom); + setOperationAction(ISD::FFLOOR, VT, Custom); + setOperationAction(ISD::FNEARBYINT, VT, Custom); + setOperationAction(ISD::FRINT, VT, Custom); + setOperationAction(ISD::FROUND, VT, Custom); + setOperationAction(ISD::FROUNDEVEN, VT, Custom); + setOperationAction(ISD::FTRUNC, VT, Custom); } } @@ -1482,6 +1494,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) MAKE_CASE(AArch64ISD::ADC) MAKE_CASE(AArch64ISD::SBC) @@ -3346,6 +3364,24 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_sve_frintp: return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frintm: + return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frinti: + return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frintx: + return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frinta: + return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frintn: + return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_frintz: + return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_convert_to_svbool: { EVT OutVT = Op.getValueType(); EVT InVT = Op.getOperand(1).getValueType(); @@ -3645,6 +3681,18 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU); case ISD::FCEIL: return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU); + case ISD::FFLOOR: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU); + case ISD::FNEARBYINT: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU); + case ISD::FRINT: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU); + case ISD::FROUND: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU); + case ISD::FROUNDEVEN: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU); + case ISD::FTRUNC: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU); case ISD::FP_ROUND: case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 38caa6a481418..d6e511891752a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -96,7 +96,13 @@ enum NodeType : unsigned { // Predicated instructions with the result of inactive lanes provided by the // last operand. FCEIL_MERGE_PASSTHRU, + FFLOOR_MERGE_PASSTHRU, + FNEARBYINT_MERGE_PASSTHRU, FNEG_MERGE_PASSTHRU, + FRINT_MERGE_PASSTHRU, + FROUND_MERGE_PASSTHRU, + FROUNDEVEN_MERGE_PASSTHRU, + FTRUNC_MERGE_PASSTHRU, SIGN_EXTEND_INREG_MERGE_PASSTHRU, ZERO_EXTEND_INREG_MERGE_PASSTHRU, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index eadf23dc46225..e01a34242a8d7 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -203,6 +203,12 @@ def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith> def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frintm_mt : SDNode<"AArch64ISD::FFLOOR_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frinti_mt : SDNode<"AArch64ISD::FNEARBYINT_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>; def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; @@ -1416,13 +1422,13 @@ multiclass sve_prefetch; defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>; - defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp, AArch64frintp_mt>; - defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>; - defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>; - defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>; - defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>; - defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>; + defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", null_frag, AArch64frintn_mt>; + defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", null_frag, AArch64frintp_mt>; + defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", null_frag, AArch64frintm_mt>; + defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", null_frag, AArch64frintz_mt>; + defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", null_frag, AArch64frinta_mt>; + defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", null_frag, AArch64frintx_mt>; + defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", null_frag, AArch64frinti_mt>; defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>; defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>; diff --git a/llvm/test/CodeGen/AArch64/sve-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-fp-rounding.ll new file mode 100644 index 0000000000000..0a31271d3f8be --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fp-rounding.ll @@ -0,0 +1,485 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; FCEIL + +define @frintp_nxv8f16( %a) { +; CHECK-LABEL: frintp_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frintp z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv8f16( %a) + ret %res +} + +define @frintp_nxv4f16( %a) { +; CHECK-LABEL: frintp_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintp z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv4f16( %a) + ret %res +} + +define @frintp_nxv2f16( %a) { +; CHECK-LABEL: frintp_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintp z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv2f16( %a) + ret %res +} + +define @frintp_nxv4f32( %a) { +; CHECK-LABEL: frintp_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintp z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv4f32( %a) + ret %res +} + +define @frintp_nxv2f32( %a) { +; CHECK-LABEL: frintp_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintp z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv2f32( %a) + ret %res +} + +define @frintp_nxv2f64( %a) { +; CHECK-LABEL: frintp_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintp z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.ceil.nxv2f64( %a) + ret %res +} + +; FFLOOR + +define @frintm_nxv8f16( %a) { +; CHECK-LABEL: frintm_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frintm z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv8f16( %a) + ret %res +} + +define @frintm_nxv4f16( %a) { +; CHECK-LABEL: frintm_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintm z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv4f16( %a) + ret %res +} + +define @frintm_nxv2f16( %a) { +; CHECK-LABEL: frintm_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintm z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv2f16( %a) + ret %res +} + +define @frintm_nxv4f32( %a) { +; CHECK-LABEL: frintm_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintm z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv4f32( %a) + ret %res +} + +define @frintm_nxv2f32( %a) { +; CHECK-LABEL: frintm_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintm z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv2f32( %a) + ret %res +} + +define @frintm_nxv2f64( %a) { +; CHECK-LABEL: frintm_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintm z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.floor.nxv2f64( %a) + ret %res +} + +; FNEARBYINT + +define @frinti_nxv8f16( %a) { +; CHECK-LABEL: frinti_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frinti z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv8f16( %a) + ret %res +} + +define @frinti_nxv4f16( %a) { +; CHECK-LABEL: frinti_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frinti z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv4f16( %a) + ret %res +} + +define @frinti_nxv2f16( %a) { +; CHECK-LABEL: frinti_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinti z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv2f16( %a) + ret %res +} + +define @frinti_nxv4f32( %a) { +; CHECK-LABEL: frinti_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frinti z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv4f32( %a) + ret %res +} + +define @frinti_nxv2f32( %a) { +; CHECK-LABEL: frinti_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinti z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv2f32( %a) + ret %res +} + +define @frinti_nxv2f64( %a) { +; CHECK-LABEL: frinti_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinti z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.nearbyint.nxv2f64( %a) + ret %res +} + +; FRINT + +define @frintx_nxv8f16( %a) { +; CHECK-LABEL: frintx_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frintx z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv8f16( %a) + ret %res +} + +define @frintx_nxv4f16( %a) { +; CHECK-LABEL: frintx_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintx z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv4f16( %a) + ret %res +} + +define @frintx_nxv2f16( %a) { +; CHECK-LABEL: frintx_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv2f16( %a) + ret %res +} + +define @frintx_nxv4f32( %a) { +; CHECK-LABEL: frintx_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv4f32( %a) + ret %res +} + +define @frintx_nxv2f32( %a) { +; CHECK-LABEL: frintx_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv2f32( %a) + ret %res +} + +define @frintx_nxv2f64( %a) { +; CHECK-LABEL: frintx_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.rint.nxv2f64( %a) + ret %res +} + +; ROUND + +define @frinta_nxv8f16( %a) { +; CHECK-LABEL: frinta_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frinta z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.round.nxv8f16( %a) + ret %res +} + +define @frinta_nxv4f16( %a) { +; CHECK-LABEL: frinta_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frinta z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.round.nxv4f16( %a) + ret %res +} + +define @frinta_nxv2f16( %a) { +; CHECK-LABEL: frinta_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinta z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.round.nxv2f16( %a) + ret %res +} + +define @frinta_nxv4f32( %a) { +; CHECK-LABEL: frinta_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frinta z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.round.nxv4f32( %a) + ret %res +} + +define @frinta_nxv2f32( %a) { +; CHECK-LABEL: frinta_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinta z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.round.nxv2f32( %a) + ret %res +} + +define @frinta_nxv2f64( %a) { +; CHECK-LABEL: frinta_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frinta z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.round.nxv2f64( %a) + ret %res +} + +; ROUNDEVEN + +define @frintn_nxv8f16( %a) { +; CHECK-LABEL: frintn_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frintn z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv8f16( %a) + ret %res +} + +define @frintn_nxv4f16( %a) { +; CHECK-LABEL: frintn_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintn z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv4f16( %a) + ret %res +} + +define @frintn_nxv2f16( %a) { +; CHECK-LABEL: frintn_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintn z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv2f16( %a) + ret %res +} + +define @frintn_nxv4f32( %a) { +; CHECK-LABEL: frintn_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintn z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv4f32( %a) + ret %res +} + +define @frintn_nxv2f32( %a) { +; CHECK-LABEL: frintn_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintn z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv2f32( %a) + ret %res +} + +define @frintn_nxv2f64( %a) { +; CHECK-LABEL: frintn_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintn z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.roundeven.nxv2f64( %a) + ret %res +} + +; FTRUNC + +define @frintz_nxv8f16( %a) { +; CHECK-LABEL: frintz_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: frintz z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv8f16( %a) + ret %res +} + +define @frintz_nxv4f16( %a) { +; CHECK-LABEL: frintz_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintz z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv4f16( %a) + ret %res +} + +define @frintz_nxv2f16( %a) { +; CHECK-LABEL: frintz_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintz z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv2f16( %a) + ret %res +} + +define @frintz_nxv4f32( %a) { +; CHECK-LABEL: frintz_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: frintz z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv4f32( %a) + ret %res +} + +define @frintz_nxv2f32( %a) { +; CHECK-LABEL: frintz_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintz z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv2f32( %a) + ret %res +} + +define @frintz_nxv2f64( %a) { +; CHECK-LABEL: frintz_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintz z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.trunc.nxv2f64( %a) + ret %res +} + +declare @llvm.ceil.nxv8f16( ) +declare @llvm.ceil.nxv4f16( ) +declare @llvm.ceil.nxv2f16( ) +declare @llvm.ceil.nxv4f32() +declare @llvm.ceil.nxv2f32() +declare @llvm.ceil.nxv2f64() + +declare @llvm.floor.nxv8f16( ) +declare @llvm.floor.nxv4f16( ) +declare @llvm.floor.nxv2f16( ) +declare @llvm.floor.nxv4f32() +declare @llvm.floor.nxv2f32() +declare @llvm.floor.nxv2f64() + +declare @llvm.nearbyint.nxv8f16( ) +declare @llvm.nearbyint.nxv4f16( ) +declare @llvm.nearbyint.nxv2f16( ) +declare @llvm.nearbyint.nxv4f32() +declare @llvm.nearbyint.nxv2f32() +declare @llvm.nearbyint.nxv2f64() + +declare @llvm.rint.nxv8f16( ) +declare @llvm.rint.nxv4f16( ) +declare @llvm.rint.nxv2f16( ) +declare @llvm.rint.nxv4f32() +declare @llvm.rint.nxv2f32() +declare @llvm.rint.nxv2f64() + +declare @llvm.round.nxv8f16( ) +declare @llvm.round.nxv4f16( ) +declare @llvm.round.nxv2f16( ) +declare @llvm.round.nxv4f32() +declare @llvm.round.nxv2f32() +declare @llvm.round.nxv2f64() + +declare @llvm.roundeven.nxv8f16( ) +declare @llvm.roundeven.nxv4f16( ) +declare @llvm.roundeven.nxv2f16( ) +declare @llvm.roundeven.nxv4f32() +declare @llvm.roundeven.nxv2f32() +declare @llvm.roundeven.nxv2f64() + +declare @llvm.trunc.nxv8f16( ) +declare @llvm.trunc.nxv4f16( ) +declare @llvm.trunc.nxv2f16( ) +declare @llvm.trunc.nxv4f32() +declare @llvm.trunc.nxv2f32() +declare @llvm.trunc.nxv2f64() diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll index 2afecdfc826d4..e4aea2847bc4c 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp.ll @@ -480,62 +480,6 @@ define void @float_copy(* %P1, * %P2) { ret void } -; FCEIL - -define @frintp_nxv8f16( %a) { -; CHECK-LABEL: frintp_nxv8f16: -; CHECK: ptrue p0.h -; CHECK-NEXT: frintp z0.h, p0/m, z0.h -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv8f16( %a) - ret %res -} - -define @frintp_nxv4f16( %a) { -; CHECK-LABEL: frintp_nxv4f16: -; CHECK: ptrue p0.s -; CHECK-NEXT: frintp z0.h, p0/m, z0.h -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv4f16( %a) - ret %res -} - -define @frintp_nxv2f16( %a) { -; CHECK-LABEL: frintp_nxv2f16: -; CHECK: ptrue p0.d -; CHECK-NEXT: frintp z0.h, p0/m, z0.h -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv2f16( %a) - ret %res -} - -define @frintp_nxv4f32( %a) { -; CHECK-LABEL: frintp_nxv4f32: -; CHECK: ptrue p0.s -; CHECK-NEXT: frintp z0.s, p0/m, z0.s -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv4f32( %a) - ret %res -} - -define @frintp_nxv2f32( %a) { -; CHECK-LABEL: frintp_nxv2f32: -; CHECK: ptrue p0.d -; CHECK-NEXT: frintp z0.s, p0/m, z0.s -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv2f32( %a) - ret %res -} - -define @frintp_nxv2f64( %a) { -; CHECK-LABEL: frintp_nxv2f64: -; CHECK: ptrue p0.d -; CHECK-NEXT: frintp z0.d, p0/m, z0.d -; CHECK-NEXT: ret - %res = call @llvm.ceil.nxv2f64( %a) - ret %res -} - declare @llvm.aarch64.sve.frecps.x.nxv8f16(, ) declare @llvm.aarch64.sve.frecps.x.nxv4f32( , ) declare @llvm.aarch64.sve.frecps.x.nxv2f64(, ) @@ -551,12 +495,5 @@ declare @llvm.fma.nxv8f16(, @llvm.fma.nxv4f16(, , ) declare @llvm.fma.nxv2f16(, , ) -declare @llvm.ceil.nxv8f16( ) -declare @llvm.ceil.nxv4f16( ) -declare @llvm.ceil.nxv2f16( ) -declare @llvm.ceil.nxv4f32() -declare @llvm.ceil.nxv2f32() -declare @llvm.ceil.nxv2f64() - ; Function Attrs: nounwind readnone declare double @llvm.aarch64.sve.faddv.nxv2f64(, ) #2 From 8c810acc94ed462238242c04c75ab33fc96da6e8 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 4 Sep 2020 16:02:26 +0100 Subject: [PATCH 205/465] [clang-format] Parse __ptr32/__ptr64 as a pointer qualifier Before: x = (foo *__ptr32) * v; MACRO(A * __ptr32 a); x = (foo *__ptr64) * v; MACRO(A * __ptr64 a); After: x = (foo *__ptr32)*v; MACRO(A *__ptr32 a); x = (foo *__ptr64)*v; MACRO(A *__ptr64 a); Depends on D86721 (to apply cleanly) Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D86775 --- clang/lib/Format/FormatToken.h | 2 +- clang/unittests/Format/FormatTest.cpp | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index a54600a478a46..ad72a95062abe 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -442,7 +442,7 @@ struct FormatToken { bool canBePointerOrReferenceQualifier() const { return isOneOf(tok::kw_const, tok::kw_restrict, tok::kw_volatile, tok::kw___attribute, tok::kw__Nonnull, tok::kw__Nullable, - tok::kw__Null_unspecified); + tok::kw__Null_unspecified, tok::kw___ptr32, tok::kw___ptr64); } /// Determine whether the token is a simple-type-specifier. diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 14c97784b7385..716fe2bf50ae7 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -8028,6 +8028,8 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("vector v;"); verifyFormat("vector v;"); verifyFormat("vector v;"); + verifyFormat("vector v;"); + verifyFormat("vector v;"); verifyFormat("vector v;"); verifyFormat("vector v;"); verifyFormat("foo();"); @@ -8070,6 +8072,8 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyIndependentOfContext("MACRO(A *__attribute((foo)) a);"); verifyIndependentOfContext("MACRO(A *[[clang::attr]] a);"); verifyIndependentOfContext("MACRO(A *[[clang::attr(\"foo\")]] a);"); + verifyIndependentOfContext("MACRO(A *__ptr32 a);"); + verifyIndependentOfContext("MACRO(A *__ptr64 a);"); verifyIndependentOfContext("MACRO('0' <= c && c <= '9');"); verifyFormat("void f() { f(float{1}, a * a); }"); // FIXME: Is there a way to make this work? @@ -8141,6 +8145,8 @@ TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) { verifyFormat("x = (foo *_Nonnull)*v;"); verifyFormat("x = (foo *[[clang::attr]])*v;"); verifyFormat("x = (foo *[[clang::attr(\"foo\")]])*v;"); + verifyFormat("x = (foo *__ptr32)*v;"); + verifyFormat("x = (foo *__ptr64)*v;"); // Check that we handle multiple trailing qualifiers and skip them all to // determine that the expression is a cast to a pointer type. @@ -8149,7 +8155,7 @@ TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) { LongPointerLeft.PointerAlignment = FormatStyle::PAS_Left; StringRef AllQualifiers = "const volatile restrict __attribute__((foo)) _Nonnull _Null_unspecified " - "_Nonnull [[clang::attr]]"; + "_Nonnull [[clang::attr]] __ptr32 __ptr64"; verifyFormat(("x = (foo *" + AllQualifiers + ")*v;").str(), LongPointerRight); verifyFormat(("x = (foo* " + AllQualifiers + ")*v;").str(), LongPointerLeft); From e0ff5a8410ea58ba3d2e75791789a28ce976a7e7 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 4 Sep 2020 16:11:48 +0100 Subject: [PATCH 206/465] [clang-format] Add a test showing the current config file list parsing Currently clang-format starts overriding the default values at index 0 (keeping the existing values) instead of appending or replacing all values. This patch simply checks the current (IMO surprising) behaviour and does not attempt to change it. Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D86941 --- .../test/Format/dump-config-list-override.cpp | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 clang/test/Format/dump-config-list-override.cpp diff --git a/clang/test/Format/dump-config-list-override.cpp b/clang/test/Format/dump-config-list-override.cpp new file mode 100644 index 0000000000000..df4c6ad1333ef --- /dev/null +++ b/clang/test/Format/dump-config-list-override.cpp @@ -0,0 +1,24 @@ +/// Check that the ForEachMacros, etc. config entries replace default values instead of appending +/// FIXME: clang-format currently start overriding at index 0 (keeping the remaining +/// values) instead of either appending or completely replacing the values. +/// This behaviour is highly confusing. For now this test documents the current state. +// RUN: clang-format -style="{BasedOnStyle: LLVM}" -dump-config %s | \ +// RUN: FileCheck %s --check-prefixes=CHECK,DEFAULT +// RUN: clang-format -style="{BasedOnStyle: LLVM, ForEachMacros: ['OVERRIDE_FOREACH']}" -dump-config %s | \ +// RUN: FileCheck %s --check-prefixes=CHECK,OVERRIDE,FIXME-SHOULD-NOT-BE +// RUN: clang-format -style="{BasedOnStyle: LLVM, ForEachMacros: ['M1', 'M2', 'M3', 'M4']}" -dump-config %s | \ +// RUN: FileCheck %s --check-prefixes=CHECK,MORE-ENTRIES-THAN-DEFAULT + + +// CHECK-LABEL: ForEachMacros: +// DEFAULT-NEXT: {{^ }}- foreach +// DEFAULT-NEXT: {{^ }}- Q_FOREACH +// DEFAULT-NEXT: {{^ }}- BOOST_FOREACH +// OVERRIDE-NEXT: {{^ }}- OVERRIDE_FOREACH +// FIXME-SHOULD-NOT-BE-NEXT: {{^ }}- Q_FOREACH +// FIXME-SHOULD-NOT-BE-NEXT: {{^ }}- BOOST_FOREACH +// MORE-ENTRIES-THAN-DEFAULT-NEXT: {{^ }}- M1 +// MORE-ENTRIES-THAN-DEFAULT-NEXT: {{^ }}- M2 +// MORE-ENTRIES-THAN-DEFAULT-NEXT: {{^ }}- M3 +// MORE-ENTRIES-THAN-DEFAULT-NEXT: {{^ }}- M4 +// CHECK-NEXT: {{^[F-Z]}} From 2108bceceb5e6eca361aaa6b10441d83bd9edc1b Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 4 Sep 2020 16:13:29 +0100 Subject: [PATCH 207/465] FormatTest: Provide real line number in failure messages Currently a test failure always reports a line number inside verifyFormat() which is not very helpful to see which test failed. With this change we now emit the line number where the verify function was called. When using an IDE such as CLion, the output now includes a clickable link that points to the call site. Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D86926 --- clang/unittests/Format/FormatTest.cpp | 38 +++++++++++++++++---------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 716fe2bf50ae7..a2d694947990f 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -20,6 +20,7 @@ using clang::tooling::ReplacementTest; using clang::tooling::toReplacements; +using testing::internal::ScopedTrace; namespace clang { namespace format { @@ -65,8 +66,10 @@ class FormatTest : public ::testing::Test { return getStyleWithColumns(getGoogleStyle(), ColumnLimit); } - void verifyFormat(llvm::StringRef Expected, llvm::StringRef Code, - const FormatStyle &Style = getLLVMStyle()) { + void _verifyFormat(const char *File, int Line, llvm::StringRef Expected, + llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + ScopedTrace t(File, Line, ::testing::Message() << Code.str()); EXPECT_EQ(Expected.str(), format(Expected, Style)) << "Expected code is not stable"; EXPECT_EQ(Expected.str(), format(Code, Style)); @@ -79,24 +82,24 @@ class FormatTest : public ::testing::Test { } } - void verifyFormat(llvm::StringRef Code, - const FormatStyle &Style = getLLVMStyle()) { - verifyFormat(Code, test::messUp(Code), Style); + void _verifyFormat(const char *File, int Line, llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + _verifyFormat(File, Line, Code, test::messUp(Code), Style); } - void verifyIncompleteFormat(llvm::StringRef Code, - const FormatStyle &Style = getLLVMStyle()) { + void _verifyIncompleteFormat(const char *File, int Line, llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle()) { + ScopedTrace t(File, Line, ::testing::Message() << Code.str()); EXPECT_EQ(Code.str(), format(test::messUp(Code), Style, SC_ExpectIncomplete)); } - void verifyGoogleFormat(llvm::StringRef Code) { - verifyFormat(Code, getGoogleStyle()); - } - - void verifyIndependentOfContext(llvm::StringRef text) { - verifyFormat(text); - verifyFormat(llvm::Twine("void f() { " + text + " }").str()); + void _verifyIndependentOfContext(const char *File, int Line, + llvm::StringRef Text, + const FormatStyle &Style = getLLVMStyle()) { + _verifyFormat(File, Line, Text, Style); + _verifyFormat(File, Line, llvm::Twine("void f() { " + Text + " }").str(), + Style); } /// \brief Verify that clang-format does not crash on the given input. @@ -108,6 +111,13 @@ class FormatTest : public ::testing::Test { int ReplacementCount; }; +#define verifyIndependentOfContext(...) \ + _verifyIndependentOfContext(__FILE__, __LINE__, __VA_ARGS__) +#define verifyIncompleteFormat(...) \ + _verifyIncompleteFormat(__FILE__, __LINE__, __VA_ARGS__) +#define verifyFormat(...) _verifyFormat(__FILE__, __LINE__, __VA_ARGS__) +#define verifyGoogleFormat(Code) verifyFormat(Code, getGoogleStyle()) + TEST_F(FormatTest, MessUp) { EXPECT_EQ("1 2 3", test::messUp("1 2 3")); EXPECT_EQ("1 2 3\n", test::messUp("1\n2\n3\n")); From 45c3560384814d04c9813e644efa8e2155ecae52 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 3 Sep 2020 23:29:21 -0700 Subject: [PATCH 208/465] [HeapProf] Address post-review comments in instrumentation code Addresses post-review comments from D85948, which can be found here: https://reviews.llvm.org/rG7ed8124d46f9. --- clang/include/clang/Basic/CodeGenOptions.def | 2 +- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/SanitizerArgs.cpp | 4 ++-- clang/lib/Driver/ToolChains/Clang.cpp | 5 +++-- clang/lib/Frontend/CompilerInvocation.cpp | 2 +- clang/test/Driver/fmemprof.cpp | 8 ++++---- .../llvm/Transforms/Instrumentation/HeapProfiler.h | 4 +--- llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp | 8 ++++---- 8 files changed, 17 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 8b89aac8d6d5f..ec77f68062e7a 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -145,7 +145,7 @@ CODEGENOPT(IncrementalLinkerCompatible, 1, 0) ///< Emit an object file which can ///< linker. CODEGENOPT(MergeAllConstants , 1, 1) ///< Merge identical constants. CODEGENOPT(MergeFunctions , 1, 0) ///< Set when -fmerge-functions is enabled. -CODEGENOPT(HeapProf , 1, 0) ///< Set when -fmemprof is enabled. +CODEGENOPT(HeapProf , 1, 0) ///< Set when -fmemory-profile is enabled. CODEGENOPT(MSVolatile , 1, 0) ///< Set when /volatile:ms is enabled. CODEGENOPT(NoCommon , 1, 0) ///< Set when -fno-common or C++ is enabled. CODEGENOPT(NoDwarfDirectoryAsm , 1, 0) ///< Set when -fno-dwarf-directory-asm is diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 912192660c14f..5f1668e701f14 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -995,7 +995,7 @@ defm cxx_static_destructors : OptOutFFlag<"c++-static-destructors", "", def fsymbol_partition_EQ : Joined<["-"], "fsymbol-partition=">, Group, Flags<[CC1Option]>; -defm memprof : OptInFFlag<"memprof", "Enable", "Disable", " heap memory profiling">; +defm memory_profile : OptInFFlag<"memory-profile", "Enable", "Disable", " heap memory profiling">; // Begin sanitizer flags. These should all be core options exposed in all driver // modes. diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index cce0eb557a9c6..0f51443010ca4 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -866,8 +866,8 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, LinkCXXRuntimes) || D.CCCIsCXX(); - NeedsHeapProfRt = - Args.hasFlag(options::OPT_fmemprof, options::OPT_fno_memprof, false); + NeedsHeapProfRt = Args.hasFlag(options::OPT_fmemory_profile, + options::OPT_fno_memory_profile, false); // Finally, initialize the set of available and recoverable sanitizers. Sanitizers.Mask |= Kinds; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index bd5a89c2360cd..1680f2ad91ea2 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4224,8 +4224,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.getLastArg(options::OPT_save_temps_EQ)) Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ); - if (Args.hasFlag(options::OPT_fmemprof, options::OPT_fno_memprof, false)) - Args.AddLastArg(CmdArgs, options::OPT_fmemprof); + if (Args.hasFlag(options::OPT_fmemory_profile, + options::OPT_fno_memory_profile, false)) + Args.AddLastArg(CmdArgs, options::OPT_fmemory_profile); // Embed-bitcode option. // Only white-listed flags below are allowed to be embedded. diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 9143dd6ca2576..fbccff11562c1 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1033,7 +1033,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.ThinLinkBitcodeFile = std::string(Args.getLastArgValue(OPT_fthin_link_bitcode_EQ)); - Opts.HeapProf = Args.hasArg(OPT_fmemprof); + Opts.HeapProf = Args.hasArg(OPT_fmemory_profile); Opts.MSVolatile = Args.hasArg(OPT_fms_volatile); diff --git a/clang/test/Driver/fmemprof.cpp b/clang/test/Driver/fmemprof.cpp index 049067803e2b4..a2b740e1e6e5e 100644 --- a/clang/test/Driver/fmemprof.cpp +++ b/clang/test/Driver/fmemprof.cpp @@ -1,6 +1,6 @@ -// RUN: %clangxx -target x86_64-linux-gnu -fmemprof %s -### 2>&1 | FileCheck %s -// RUN: %clangxx -target x86_64-linux-gnu -fmemprof -fno-memprof %s -### 2>&1 | FileCheck %s --check-prefix=OFF -// CHECK: "-cc1" {{.*}} "-fmemprof" +// RUN: %clangxx -target x86_64-linux-gnu -fmemory-profile %s -### 2>&1 | FileCheck %s +// RUN: %clangxx -target x86_64-linux-gnu -fmemory-profile -fno-memory-profile %s -### 2>&1 | FileCheck %s --check-prefix=OFF +// CHECK: "-cc1" {{.*}} "-fmemory-profile" // CHECK: ld{{.*}}libclang_rt.heapprof{{.*}}libclang_rt.heapprof_cxx -// OFF-NOT: "-fmemprof" +// OFF-NOT: "-fmemory-profile" // OFF-NOT: libclang_rt.heapprof diff --git a/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h index af905bbecad8f..21943616c5e1b 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h @@ -1,4 +1,4 @@ -//===--------- Definition of the HeapProfiler class ---------*- C++ -*-===// +//===--------- Definition of the HeapProfiler class -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -38,8 +38,6 @@ class ModuleHeapProfilerPass : public PassInfoMixin { public: explicit ModuleHeapProfilerPass(); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - -private: }; // Insert HeapProfiler instrumentation diff --git a/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp b/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp index 6372dfded82a7..5f8671d7d88fc 100644 --- a/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp @@ -1,5 +1,4 @@ -//===- HeapProfiler.cpp - heap allocation and access profiler -//--------------===// +//===- HeapProfiler.cpp - heap allocation and access profiler -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -164,7 +163,8 @@ class HeapProfiler { /// If it is an interesting memory access, populate information /// about the access and return a InterestingMemoryAccess struct. /// Otherwise return None. - Optional isInterestingMemoryAccess(Instruction *I); + Optional + isInterestingMemoryAccess(Instruction *I) const; void instrumentMop(Instruction *I, const DataLayout &DL, InterestingMemoryAccess &Access); @@ -321,7 +321,7 @@ void HeapProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { } Optional -HeapProfiler::isInterestingMemoryAccess(Instruction *I) { +HeapProfiler::isInterestingMemoryAccess(Instruction *I) const { // Do not instrument the load fetching the dynamic shadow address. if (DynamicShadowOffset == I) return None; From 4eef14f9780d9fc9a88096a3cabd669bcfa02bbc Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 4 Sep 2020 09:05:13 -0700 Subject: [PATCH 209/465] [OpenMPOpt] Assume indirect call always changes ICV When checking call sites, give special handling to indirect call, as the callee may be unknown and can lead to nullptr dereference later. Assume conservatively that the ICV always changes in such case. Reviewed By: sstefan1 Differential Revision: https://reviews.llvm.org/D87104 --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 3 +++ llvm/test/Transforms/OpenMP/icv_tracking.ll | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 8dfe42ebc27b5..3804a4bb79214 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1480,6 +1480,9 @@ struct AAICVTrackerFunction : public AAICVTracker { auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; Function *CalledFunction = CB->getCalledFunction(); + // Indirect call, assume ICV changes. + if (CalledFunction == nullptr) + return nullptr; if (CalledFunction == GetterRFI.Declaration) return None; if (CalledFunction == SetterRFI.Declaration) { diff --git a/llvm/test/Transforms/OpenMP/icv_tracking.ll b/llvm/test/Transforms/OpenMP/icv_tracking.ll index 19b55cc661b00..560ad2fbcd3de 100644 --- a/llvm/test/Transforms/OpenMP/icv_tracking.ll +++ b/llvm/test/Transforms/OpenMP/icv_tracking.ll @@ -30,6 +30,21 @@ define i32 @bad_use(i32 %0) { ret i32 %2 } +define void @indirect_call(void ()* %0) { +; CHECK-LABEL: define {{[^@]+}}@indirect_call +; CHECK-SAME: (void ()* [[TMP0:%.*]]) +; CHECK-NEXT: call void @omp_set_num_threads(i32 4) +; CHECK-NEXT: tail call void [[TMP0]]() +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @omp_get_max_threads() +; CHECK-NEXT: tail call void @use(i32 [[TMP2]]) +; CHECK-NEXT: ret void + call void @omp_set_num_threads(i32 4) + tail call void %0() + %2 = tail call i32 @omp_get_max_threads() + tail call void @use(i32 %2) + ret void +} + define dso_local i32 @foo(i32 %0, i32 %1) { ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) From baf3c77bd9f6baf60a09ef3625fef84080642b72 Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Wed, 2 Sep 2020 13:11:35 -0700 Subject: [PATCH 210/465] [libclang] Add translateCXRangeToCharRange conversion Add new conversion with clearly specified semantics. https://reviews.llvm.org/D86990 --- clang/tools/libclang/CIndex.cpp | 6 ++++++ clang/tools/libclang/CXSourceLocation.h | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 93f9797a965ec..683b517d79fda 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -164,6 +164,12 @@ CXSourceRange cxloc::translateSourceRange(const SourceManager &SM, return Result; } +CharSourceRange cxloc::translateCXRangeToCharRange(CXSourceRange R) { + return CharSourceRange::getCharRange( + SourceLocation::getFromRawEncoding(R.begin_int_data), + SourceLocation::getFromRawEncoding(R.end_int_data)); +} + //===----------------------------------------------------------------------===// // Cursor visitor. //===----------------------------------------------------------------------===// diff --git a/clang/tools/libclang/CXSourceLocation.h b/clang/tools/libclang/CXSourceLocation.h index 6702d0cf9791b..ce3d09e1c9eb8 100644 --- a/clang/tools/libclang/CXSourceLocation.h +++ b/clang/tools/libclang/CXSourceLocation.h @@ -71,7 +71,11 @@ static inline SourceRange translateCXSourceRange(CXSourceRange R) { SourceLocation::getFromRawEncoding(R.end_int_data)); } - +/// Translates CXSourceRange to CharSourceRange. +/// The semantics of \p R are: +/// R.begin_int_data is first character of the range. +/// R.end_int_data is one character past the end of the range. +CharSourceRange translateCXRangeToCharRange(CXSourceRange R); }} // end namespace: clang::cxloc #endif From f0b9dbcfc7ba2a217cab3217d6217fc270c88b58 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Tue, 21 Jul 2020 18:50:43 +0200 Subject: [PATCH 211/465] [analyzer][StdLibraryFunctionsChecker] Add POSIX time handling functions Differential Revision: https://reviews.llvm.org/D84248 --- .../Checkers/StdLibraryFunctionsChecker.cpp | 177 ++++++++++++++++-- .../Analysis/std-c-library-functions-POSIX.c | 32 ++++ .../std-c-library-functions-arg-constraints.c | 11 ++ 3 files changed, 207 insertions(+), 13 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index 2c20422a9cc48..ddde629f44a5c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -249,15 +249,21 @@ class StdLibraryFunctionsChecker } }; - // Represents a buffer argument with an additional size argument. - // E.g. the first two arguments here: + // Represents a buffer argument with an additional size constraint. The + // constraint may be a concrete value, or a symbolic value in an argument. + // Example 1. Concrete value as the minimum buffer size. + // char *asctime_r(const struct tm *restrict tm, char *restrict buf); + // // `buf` size must be at least 26 bytes according the POSIX standard. + // Example 2. Argument as a buffer size. // ctime_s(char *buffer, rsize_t bufsz, const time_t *time); - // Another example: + // Example 3. The size is computed as a multiplication of other args. // size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); // // Here, ptr is the buffer, and its minimum size is `size * nmemb`. class BufferSizeConstraint : public ValueConstraint { + // The concrete value which is the minimum size for the buffer. + llvm::Optional ConcreteSize; // The argument which holds the size of the buffer. - ArgNo SizeArgN; + llvm::Optional SizeArgN; // The argument which is a multiplier to size. This is set in case of // `fread` like functions where the size is computed as a multiplication of // two arguments. @@ -266,9 +272,10 @@ class StdLibraryFunctionsChecker BinaryOperator::Opcode Op = BO_LE; public: + BufferSizeConstraint(ArgNo Buffer, llvm::APSInt BufMinSize) + : ValueConstraint(Buffer), ConcreteSize(BufMinSize) {} BufferSizeConstraint(ArgNo Buffer, ArgNo BufSize) : ValueConstraint(Buffer), SizeArgN(BufSize) {} - BufferSizeConstraint(ArgNo Buffer, ArgNo BufSize, ArgNo BufSizeMultiplier) : ValueConstraint(Buffer), SizeArgN(BufSize), SizeMultiplierArgN(BufSizeMultiplier) {} @@ -279,14 +286,27 @@ class StdLibraryFunctionsChecker SValBuilder &SvalBuilder = C.getSValBuilder(); // The buffer argument. SVal BufV = getArgSVal(Call, getArgNo()); - // The size argument. - SVal SizeV = getArgSVal(Call, SizeArgN); - // Multiply with another argument if given. - if (SizeMultiplierArgN) { - SVal SizeMulV = getArgSVal(Call, *SizeMultiplierArgN); - SizeV = SvalBuilder.evalBinOp(State, BO_Mul, SizeV, SizeMulV, - Summary.getArgType(SizeArgN)); - } + + // Get the size constraint. + const SVal SizeV = [this, &State, &Call, &Summary, &SvalBuilder]() { + if (ConcreteSize) { + return SVal(SvalBuilder.makeIntVal(*ConcreteSize)); + } else if (SizeArgN) { + // The size argument. + SVal SizeV = getArgSVal(Call, *SizeArgN); + // Multiply with another argument if given. + if (SizeMultiplierArgN) { + SVal SizeMulV = getArgSVal(Call, *SizeMultiplierArgN); + SizeV = SvalBuilder.evalBinOp(State, BO_Mul, SizeV, SizeMulV, + Summary.getArgType(*SizeArgN)); + } + return SizeV; + } else { + llvm_unreachable("The constraint must be either a concrete value or " + "encoded in an arguement."); + } + }(); + // The dynamic size of the buffer argument, got from the analyzer engine. SVal BufDynSize = getDynamicSizeWithOffset(State, BufV); @@ -2036,6 +2056,132 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( BufferSize(/*Buffer=*/ArgNo(4), /*BufSize=*/ArgNo(5))) .ArgConstraint( ArgumentCondition(5, WithinRange, Range(0, Socklen_tMax)))); + + Optional StructUtimbufTy = lookupTy("utimbuf"); + Optional StructUtimbufPtrTy = getPointerTy(StructUtimbufTy); + + // int utime(const char *filename, struct utimbuf *buf); + addToFunctionSummaryMap( + "utime", Summary(ArgTypes{ConstCharPtrTy, StructUtimbufPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0)))); + + Optional StructTimespecTy = lookupTy("timespec"); + Optional StructTimespecPtrTy = getPointerTy(StructTimespecTy); + Optional ConstStructTimespecPtrTy = + getPointerTy(getConstTy(StructTimespecTy)); + + // int futimens(int fd, const struct timespec times[2]); + addToFunctionSummaryMap( + "futimens", Summary(ArgTypes{IntTy, ConstStructTimespecPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(ArgumentCondition(0, WithinRange, + Range(0, IntMax)))); + + // int utimensat(int dirfd, const char *pathname, + // const struct timespec times[2], int flags); + addToFunctionSummaryMap("utimensat", + Summary(ArgTypes{IntTy, ConstCharPtrTy, + ConstStructTimespecPtrTy, IntTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(1)))); + + Optional StructTimevalTy = lookupTy("timeval"); + Optional ConstStructTimevalPtrTy = + getPointerTy(getConstTy(StructTimevalTy)); + + // int utimes(const char *filename, const struct timeval times[2]); + addToFunctionSummaryMap( + "utimes", Summary(ArgTypes{ConstCharPtrTy, ConstStructTimevalPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0)))); + + // int nanosleep(const struct timespec *rqtp, struct timespec *rmtp); + addToFunctionSummaryMap( + "nanosleep", + Summary(ArgTypes{ConstStructTimespecPtrTy, StructTimespecPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0)))); + + Optional Time_tTy = lookupTy("time_t"); + Optional ConstTime_tPtrTy = getPointerTy(getConstTy(Time_tTy)); + Optional ConstTime_tPtrRestrictTy = + getRestrictTy(ConstTime_tPtrTy); + + Optional StructTmTy = lookupTy("tm"); + Optional StructTmPtrTy = getPointerTy(StructTmTy); + Optional StructTmPtrRestrictTy = getRestrictTy(StructTmPtrTy); + Optional ConstStructTmPtrTy = + getPointerTy(getConstTy(StructTmTy)); + Optional ConstStructTmPtrRestrictTy = + getRestrictTy(ConstStructTmPtrTy); + + // struct tm * localtime(const time_t *tp); + addToFunctionSummaryMap( + "localtime", + Summary(ArgTypes{ConstTime_tPtrTy}, RetType{StructTmPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0)))); + + // struct tm *localtime_r(const time_t *restrict timer, + // struct tm *restrict result); + addToFunctionSummaryMap( + "localtime_r", + Summary(ArgTypes{ConstTime_tPtrRestrictTy, StructTmPtrRestrictTy}, + RetType{StructTmPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(1)))); + + // char *asctime_r(const struct tm *restrict tm, char *restrict buf); + addToFunctionSummaryMap( + "asctime_r", + Summary(ArgTypes{ConstStructTmPtrRestrictTy, CharPtrRestrictTy}, + RetType{CharPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(1))) + .ArgConstraint(BufferSize(/*Buffer=*/ArgNo(1), + /*MinBufSize=*/BVF.getValue(26, IntTy)))); + + // char *ctime_r(const time_t *timep, char *buf); + addToFunctionSummaryMap("ctime_r", + Summary(ArgTypes{ConstTime_tPtrTy, CharPtrTy}, + RetType{CharPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(1))) + .ArgConstraint(BufferSize( + /*Buffer=*/ArgNo(1), + /*MinBufSize=*/BVF.getValue(26, IntTy)))); + + // struct tm *gmtime_r(const time_t *restrict timer, + // struct tm *restrict result); + addToFunctionSummaryMap( + "gmtime_r", + Summary(ArgTypes{ConstTime_tPtrRestrictTy, StructTmPtrRestrictTy}, + RetType{StructTmPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(1)))); + + // struct tm * gmtime(const time_t *tp); + addToFunctionSummaryMap( + "gmtime", + Summary(ArgTypes{ConstTime_tPtrTy}, RetType{StructTmPtrTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0)))); + + Optional Clockid_tTy = lookupTy("clockid_t"); + + // int clock_gettime(clockid_t clock_id, struct timespec *tp); + addToFunctionSummaryMap("clock_gettime", + Summary(ArgTypes{Clockid_tTy, StructTimespecPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(1)))); + + Optional StructItimervalTy = lookupTy("itimerval"); + Optional StructItimervalPtrTy = getPointerTy(StructItimervalTy); + + // int getitimer(int which, struct itimerval *curr_value); + addToFunctionSummaryMap("getitimer", + Summary(ArgTypes{IntTy, StructItimervalPtrTy}, + RetType{IntTy}, NoEvalCall) + .ArgConstraint(NotNull(ArgNo(1)))); } // Functions for testing. @@ -2071,6 +2217,11 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( EvalCallAsPure) .ArgConstraint(BufferSize(/*Buffer=*/ArgNo(0), /*BufSize=*/ArgNo(1), /*BufSizeMultiplier=*/ArgNo(2)))); + addToFunctionSummaryMap( + "__buf_size_arg_constraint_concrete", + Summary(ArgTypes{ConstVoidPtrTy}, RetType{IntTy}, EvalCallAsPure) + .ArgConstraint(BufferSize(/*Buffer=*/ArgNo(0), + /*BufSize=*/BVF.getValue(10, IntTy)))); addToFunctionSummaryMap( {"__test_restrict_param_0", "__test_restrict_param_1", "__test_restrict_param_2"}, diff --git a/clang/test/Analysis/std-c-library-functions-POSIX.c b/clang/test/Analysis/std-c-library-functions-POSIX.c index 3638ad100240a..d65e9f029b6be 100644 --- a/clang/test/Analysis/std-c-library-functions-POSIX.c +++ b/clang/test/Analysis/std-c-library-functions-POSIX.c @@ -95,6 +95,19 @@ // CHECK: Loaded summary for: ssize_t send(int sockfd, const void *buf, size_t len, int flags) // CHECK: Loaded summary for: int socketpair(int domain, int type, int protocol, int sv[2]) // CHECK: Loaded summary for: int getnameinfo(const struct sockaddr *restrict sa, socklen_t salen, char *restrict node, socklen_t nodelen, char *restrict service, socklen_t servicelen, int flags) +// CHECK: Loaded summary for: int utime(const char *filename, struct utimbuf *buf) +// CHECK: Loaded summary for: int futimens(int fd, const struct timespec times[2]) +// CHECK: Loaded summary for: int utimensat(int dirfd, const char *pathname, const struct timespec times[2], int flags) +// CHECK: Loaded summary for: int utimes(const char *filename, const struct timeval times[2]) +// CHECK: Loaded summary for: int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) +// CHECK: Loaded summary for: struct tm *localtime(const time_t *tp) +// CHECK: Loaded summary for: struct tm *localtime_r(const time_t *restrict timer, struct tm *restrict result) +// CHECK: Loaded summary for: char *asctime_r(const struct tm *restrict tm, char *restrict buf) +// CHECK: Loaded summary for: char *ctime_r(const time_t *timep, char *buf) +// CHECK: Loaded summary for: struct tm *gmtime_r(const time_t *restrict timer, struct tm *restrict result) +// CHECK: Loaded summary for: struct tm *gmtime(const time_t *tp) +// CHECK: Loaded summary for: int clock_gettime(clockid_t clock_id, struct timespec *tp) +// CHECK: Loaded summary for: int getitimer(int which, struct itimerval *curr_value) long a64l(const char *str64); char *l64a(long value); @@ -226,6 +239,25 @@ int getsockopt(int socket, int level, int option_name, void *restrict option_val ssize_t send(int sockfd, const void *buf, size_t len, int flags); int socketpair(int domain, int type, int protocol, int sv[2]); int getnameinfo(const struct sockaddr *restrict sa, socklen_t salen, char *restrict node, socklen_t nodelen, char *restrict service, socklen_t servicelen, int flags); +struct utimbuf; +struct timespec { int x; }; +struct timeval { int x; }; +int utime(const char *filename, struct utimbuf *buf); +int futimens(int fd, const struct timespec times[2]); +int utimensat(int dirfd, const char *pathname, const struct timespec times[2], int flags); +int utimes(const char *filename, const struct timeval times[2]); +int nanosleep(const struct timespec *rqtp, struct timespec *rmtp); +typedef unsigned long time_t; +struct tm *localtime(const time_t *tp); +struct tm *localtime_r(const time_t *restrict timer, struct tm *restrict result); +char *asctime_r(const struct tm *restrict tm, char *restrict buf); +char *ctime_r(const time_t *timep, char *buf); +struct tm *gmtime_r(const time_t *restrict timer, struct tm *restrict result); +struct tm *gmtime(const time_t *tp); +typedef unsigned long clockid_t; +int clock_gettime(clockid_t clock_id, struct timespec *tp); +struct itimerval; +int getitimer(int which, struct itimerval *curr_value); // Must have at least one call expression to initialize the summary map. int bar(void); diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints.c b/clang/test/Analysis/std-c-library-functions-arg-constraints.c index e926cd15384d1..28979abd43b58 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-constraints.c +++ b/clang/test/Analysis/std-c-library-functions-arg-constraints.c @@ -256,6 +256,7 @@ void test_buf_size_symbolic_and_offset(int s) { // bugpath-note{{TRUE}} \ // bugpath-note{{'s' is <= 2}} } + int __buf_size_arg_constraint_mul(const void *, size_t, size_t); void test_buf_size_concrete_with_multiplication() { short buf[3]; // bugpath-note{{'buf' initialized here}} @@ -280,3 +281,13 @@ void test_buf_size_symbolic_and_offset_with_multiplication(size_t s) { // bugpath-warning{{TRUE}} \ // bugpath-note{{TRUE}} } + +// The minimum buffer size for this function is set to 10. +int __buf_size_arg_constraint_concrete(const void *); +void test_min_buf_size() { + char buf[9];// bugpath-note{{'buf' initialized here}} + __buf_size_arg_constraint_concrete(buf); // \ + // report-warning{{Function argument constraint is not satisfied}} \ + // bugpath-warning{{Function argument constraint is not satisfied}} \ + // bugpath-note{{Function argument constraint is not satisfied}} +} From 51932fc6bde88d1798a6cdea1f3885164d5524d7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Sep 2020 17:49:33 +0100 Subject: [PATCH 212/465] [DSE,MemorySSA] Remove some duplicated test functions. Some tests from multibuild-malloc-free.ll do not actually use malloc or free and where split out to multiblock-throwing.ll, but not removed from the original file. This patch cleans that up. It also moves @test22 to simple.ll, because it does not involve multiple blocks. --- .../MSSA/multiblock-malloc-free.ll | 90 ------------------- .../MSSA/multiblock-throwing.ll | 15 ---- .../DeadStoreElimination/MSSA/simple.ll | 14 +++ 3 files changed, 14 insertions(+), 105 deletions(-) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll index 04cdae285d814..46712ccc92ff9 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -84,31 +84,6 @@ bb3: ret void } - -define void @test6(i32* noalias %P) { -; CHECK-LABEL: @test6( -; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]] -; CHECK-NEXT: ret void -; - store i32 0, i32* %P - br i1 true, label %bb1, label %bb2 -bb1: - br label %bb3 -bb2: - call void @unknown_func() - br label %bb3 -bb3: - store i32 0, i32* %P - ret void -} - define void @test19(i32* noalias %P) { ; CHECK-LABEL: @test19( ; CHECK-NEXT: entry: @@ -173,71 +148,6 @@ bb3: ret void } - -define i32 @test22(i32* %P, i32* noalias %Q, i32* %R) { -; CHECK-LABEL: @test22( -; CHECK-NEXT: store i32 2, i32* [[P:%.*]] -; CHECK-NEXT: store i32 3, i32* [[Q:%.*]] -; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[R:%.*]] -; CHECK-NEXT: ret i32 [[L]] -; - store i32 1, i32* %Q - store i32 2, i32* %P - store i32 3, i32* %Q - %l = load i32, i32* %R - ret i32 %l -} - - -define void @test23(i32* noalias %P) { -; CHECK-LABEL: @test23( -; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]] -; CHECK-NEXT: ret void -; - br i1 true, label %bb1, label %bb2 -bb1: - store i32 0, i32* %P - br label %bb3 -bb2: - call void @unknown_func() - br label %bb3 -bb3: - store i32 0, i32* %P - ret void -} - - -define void @test24(i32* noalias %P) { -; CHECK-LABEL: @test24( -; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]] -; CHECK-NEXT: ret void -; - br i1 true, label %bb2, label %bb1 -bb1: - store i32 0, i32* %P - br label %bb3 -bb2: - call void @unknown_func() - br label %bb3 -bb3: - store i32 0, i32* %P - ret void -} - define i8* @test26() { ; CHECK-LABEL: @test26( ; CHECK-NEXT: bb1: diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll index 93a9a2d999e1a..d297ac3f44f8a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll @@ -30,21 +30,6 @@ bb3: ret void } -define i32 @test22(i32* %P, i32* noalias %Q, i32* %R) { -; CHECK-LABEL: @test22( -; CHECK-NEXT: store i32 2, i32* [[P:%.*]] -; CHECK-NEXT: store i32 3, i32* [[Q:%.*]] -; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[R:%.*]] -; CHECK-NEXT: ret i32 [[L]] -; - store i32 1, i32* %Q - store i32 2, i32* %P - store i32 3, i32* %Q - %l = load i32, i32* %R - ret i32 %l -} - - define void @test23(i32* noalias %P) { ; CHECK-LABEL: @test23( ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll index 5c04e11b4a78e..1ac6ad7d81586 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -749,3 +749,17 @@ define void @test47_volatile(i32* %P) { store volatile i32 3, i32* %P, align 4 ret void } + +define i32 @test48(i32* %P, i32* noalias %Q, i32* %R) { +; CHECK-LABEL: @test48( +; CHECK-NEXT: store i32 2, i32* [[P:%.*]], align 4 +; CHECK-NEXT: store i32 3, i32* [[Q:%.*]], align 4 +; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[R:%.*]], align 4 +; CHECK-NEXT: ret i32 [[L]] +; + store i32 1, i32* %Q + store i32 2, i32* %P + store i32 3, i32* %Q + %l = load i32, i32* %R + ret i32 %l +} From 3f1a9b7eca0a969e18aabefa3ceb9054b94c17c0 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Thu, 3 Sep 2020 17:07:59 -0700 Subject: [PATCH 213/465] [objdump][macho] Emit segment names along with section names I recently came across a MachO with multiple sections of the same name but different segments. We should emit the segment name alongside the section name for MachO's. Differential Revision: https://reviews.llvm.org/D87119 --- .../test/MC/AArch64/arm64_32-compact-unwind.s | 2 +- .../llvm-objdump/MachO/section-contents.test | 8 ++--- llvm/tools/llvm-objdump/llvm-objdump.cpp | 31 ++++++++++++------- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/llvm/test/MC/AArch64/arm64_32-compact-unwind.s b/llvm/test/MC/AArch64/arm64_32-compact-unwind.s index 59d882ae3a5c0..d221640a07478 100644 --- a/llvm/test/MC/AArch64/arm64_32-compact-unwind.s +++ b/llvm/test/MC/AArch64/arm64_32-compact-unwind.s @@ -4,7 +4,7 @@ ; The compact unwind format in ILP32 mode is pretty much the same, except ; references to addresses (function, personality, LSDA) are pointer-sized. -; CHECK: Contents of section __compact_unwind: +; CHECK: Contents of section __LD,__compact_unwind: ; CHECK-NEXT: 0004 00000000 04000000 00000002 00000000 ; CHECK-NEXT: 0014 00000000 .globl _test_compact_unwind diff --git a/llvm/test/tools/llvm-objdump/MachO/section-contents.test b/llvm/test/tools/llvm-objdump/MachO/section-contents.test index cd68e1fa550b4..d938e903fd079 100644 --- a/llvm/test/tools/llvm-objdump/MachO/section-contents.test +++ b/llvm/test/tools/llvm-objdump/MachO/section-contents.test @@ -1,16 +1,16 @@ RUN: llvm-objdump --macho -s %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s -CHECK: Contents of section __text: +CHECK: Contents of section __TEXT,__text: CHECK: 0000 554889e5 4883ec20 488d0500 000000c7 UH..H.. H....... CHECK: 0010 45fc0000 0000897d f8488975 f0488955 E......}.H.u.H.U CHECK: 0020 e84889c7 b000e800 000000b9 00000000 .H.............. CHECK: 0030 8945e489 c84883c4 205dc3 .E...H.. ]. -CHECK: Contents of section __cstring: +CHECK: Contents of section __TEXT,__cstring: CHECK: 003b 48656c6c 6f20776f 726c640a 00 Hello world.. -CHECK: Contents of section __compact_unwind: +CHECK: Contents of section __LD,__compact_unwind: CHECK: 0048 00000000 00000000 3b000000 00000001 ........;....... CHECK: 0058 00000000 00000000 00000000 00000000 ................ -CHECK: Contents of section __eh_frame: +CHECK: Contents of section __TEXT,__eh_frame: CHECK: 0068 14000000 00000000 017a5200 01781001 .........zR..x.. CHECK: 0078 100c0708 90010000 24000000 1c000000 ........$....... CHECK: 0088 78ffffff ffffffff 3b000000 00000000 x.......;....... diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 6b3ecd9cef193..b63d08b90ff51 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1619,6 +1619,16 @@ collectLocalBranchTargets(ArrayRef Bytes, const MCInstrAnalysis *MIA, } } +static StringRef getSegmentName(const MachOObjectFile *MachO, + const SectionRef &Section) { + if (MachO) { + DataRefImpl DR = Section.getRawDataRefImpl(); + StringRef SegmentName = MachO->getSectionFinalSegmentName(DR); + return SegmentName; + } + return ""; +} + static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, @@ -1783,12 +1793,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, } } - StringRef SegmentName = ""; - if (MachO) { - DataRefImpl DR = Section.getRawDataRefImpl(); - SegmentName = MachO->getSectionFinalSegmentName(DR); - } - + StringRef SegmentName = getSegmentName(MachO, Section); StringRef SectionName = unwrapOrError(Section.getName(), Obj->getFileName()); // If the section has no symbol at the start, just insert a dummy one. if (Symbols.empty() || Symbols[0].Addr != 0) { @@ -2388,6 +2393,8 @@ void objdump::printSectionHeaders(const ObjectFile *Obj) { } void objdump::printSectionContents(const ObjectFile *Obj) { + const MachOObjectFile *MachO = dyn_cast(Obj); + for (const SectionRef &Section : ToolSectionFilter(*Obj)) { StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); uint64_t BaseAddr = Section.getAddress(); @@ -2395,7 +2402,11 @@ void objdump::printSectionContents(const ObjectFile *Obj) { if (!Size) continue; - outs() << "Contents of section " << Name << ":\n"; + outs() << "Contents of section "; + StringRef SegmentName = getSegmentName(MachO, Section); + if (!SegmentName.empty()) + outs() << SegmentName << ","; + outs() << Name << ":\n"; if (Section.isBSS()) { outs() << format("\n", @@ -2553,11 +2564,9 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, } else if (Section == O->section_end()) { outs() << "*UND*"; } else { - if (MachO) { - DataRefImpl DR = Section->getRawDataRefImpl(); - StringRef SegmentName = MachO->getSectionFinalSegmentName(DR); + StringRef SegmentName = getSegmentName(MachO, *Section); + if (!SegmentName.empty()) outs() << SegmentName << ","; - } StringRef SectionName = unwrapOrError(Section->getName(), FileName); outs() << SectionName; } From dde16ef031a0a9465444b0b02b9427709ef70651 Mon Sep 17 00:00:00 2001 From: Hubert Tong Date: Fri, 4 Sep 2020 13:07:44 -0400 Subject: [PATCH 214/465] [tests][libFuzzer] Fix `-Wmissing-field-initializers` after D86092 Speculatively fix `-Werror,-Wmissing-field-initializers` failures relating to the `ScalePerExecTime` field added by D86092. --- compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp index e17fca8fe9ed6..d2b5cbb7d57d0 100644 --- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -592,7 +592,7 @@ TEST(FuzzerUtil, Base64) { TEST(Corpus, Distribution) { DataFlowTrace DFT; Random Rand(0); - struct EntropicOptions Entropic = {false, 0xFF, 100}; + struct EntropicOptions Entropic = {false, 0xFF, 100, false}; std::unique_ptr C(new InputCorpus("", Entropic)); size_t N = 10; size_t TriesPerUnit = 1<<16; @@ -1060,7 +1060,7 @@ TEST(Entropic, UpdateFrequency) { const size_t FeatIdx1 = 0, FeatIdx2 = 42, FeatIdx3 = 12, FeatIdx4 = 26; size_t Index; // Create input corpus with default entropic configuration - struct EntropicOptions Entropic = {true, 0xFF, 100}; + struct EntropicOptions Entropic = {true, 0xFF, 100, false}; std::unique_ptr C(new InputCorpus("", Entropic)); std::unique_ptr II(new InputInfo()); @@ -1097,7 +1097,7 @@ double SubAndSquare(double X, double Y) { TEST(Entropic, ComputeEnergy) { const double Precision = 0.01; - struct EntropicOptions Entropic = {true, 0xFF, 100}; + struct EntropicOptions Entropic = {true, 0xFF, 100, false}; std::unique_ptr C(new InputCorpus("", Entropic)); std::unique_ptr II(new InputInfo()); Vector> FeatureFreqs = {{1, 3}, {2, 3}, {3, 3}}; From 8760048384f545ae6f1d861cdbc95b1c76f41d9f Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 4 Sep 2020 10:26:09 -0700 Subject: [PATCH 215/465] [lld] Test corrections after 3f1a9b7eca0 added segment names to objdump output --- lld/test/MachO/local-got.s | 4 ++-- lld/test/MachO/relocations.s | 2 +- lld/test/MachO/sectcreate.s | 8 ++++---- lld/test/MachO/section-merge.s | 2 +- lld/test/MachO/weak-binding.s | 4 ++-- lld/test/MachO/x86-64-reloc-unsigned.s | 4 ++-- lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml | 2 +- lld/test/mach-o/eh-frame-relocs-arm64.yaml | 4 ++-- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lld/test/MachO/local-got.s b/lld/test/MachO/local-got.s index 6acd1ca4a72da..bed04eecd4a9f 100644 --- a/lld/test/MachO/local-got.s +++ b/lld/test/MachO/local-got.s @@ -11,12 +11,12 @@ ## Check that the GOT references the cstrings. --full-contents displays the ## address offset and the contents at that address very similarly, so am using ## --match-full-lines to make sure we match on the right thing. -# CHECK: Contents of section __cstring: +# CHECK: Contents of section __TEXT,__cstring: # CHECK-NEXT: 10000040c {{.*}} ## 1st 8 bytes refer to the start of __cstring + 0xe, 2nd 8 bytes refer to the ## start of __cstring -# CHECK: Contents of section __got: +# CHECK: Contents of section __DATA_CONST,__got: # CHECK-NEXT: [[#%X,ADDR:]] 1a040000 01000000 0c040000 01000000 {{.*}} # CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}} diff --git a/lld/test/MachO/relocations.s b/lld/test/MachO/relocations.s index 006df404242b7..e6f7778413d5b 100644 --- a/lld/test/MachO/relocations.s +++ b/lld/test/MachO/relocations.s @@ -20,7 +20,7 @@ # CHECK-NEXT: [[#%x, CSTRING_ADDR + 22 - LSTR_OFF]] # RUN: llvm-objdump --section=__const --full-contents %t | FileCheck %s --check-prefix=NONPCREL -# NONPCREL: Contents of section __const: +# NONPCREL: Contents of section __DATA,__const: # NONPCREL-NEXT: 100001000 f0030000 01000000 f0030000 01000000 .section __TEXT,__text diff --git a/lld/test/MachO/sectcreate.s b/lld/test/MachO/sectcreate.s index ac561d88110ba..482e3e5572dc9 100644 --- a/lld/test/MachO/sectcreate.s +++ b/lld/test/MachO/sectcreate.s @@ -10,13 +10,13 @@ # RUN: -o %t %t.o # RUN: llvm-objdump -s %t | FileCheck %s -# CHECK: Contents of section __text: -# CHECK: Contents of section __data: +# CHECK: Contents of section __TEXT,__text: +# CHECK: Contents of section __DATA,__data: # CHECK: my string!. -# CHECK: Contents of section SEC1: +# CHECK: Contents of section SEG,SEC1: # CHECK: -sectcreate 1.1. # CHECK: -sectcreate 1.2. -# CHECK: Contents of section SEC2: +# CHECK: Contents of section SEG,SEC2: # CHECK: -sectcreate 2. .text diff --git a/lld/test/MachO/section-merge.s b/lld/test/MachO/section-merge.s index 69c55a047b490..34c19d769b945 100644 --- a/lld/test/MachO/section-merge.s +++ b/lld/test/MachO/section-merge.s @@ -15,7 +15,7 @@ # CHECK-DAG: {{0*}}[[#ADDR+0x8]] g O __DATA,__data _baz # CHECK-DAG: {{0*}}[[#ADDR+0x9]] g O __DATA,__data _qux -# CHECK: Contents of section __data: +# CHECK: Contents of section __DATA,__data: # CHECK-NEXT: {{0*}}[[#ADDR]] ca000000 fe000000 baefbead de000000 .section __TEXT,__text diff --git a/lld/test/MachO/weak-binding.s b/lld/test/MachO/weak-binding.s index 3474d35ce921b..fc4106bf953b6 100644 --- a/lld/test/MachO/weak-binding.s +++ b/lld/test/MachO/weak-binding.s @@ -7,13 +7,13 @@ # RUN: llvm-objdump -d --no-show-raw-insn --bind --lazy-bind --weak-bind --full-contents %t/test | \ # RUN: FileCheck %s -# CHECK: Contents of section __la_symbol_ptr: +# CHECK: Contents of section __DATA,__la_symbol_ptr: ## Check that this section contains a nonzero pointer. It should point to ## _weak_external_fn, but we don't have a good way of testing the exact value as ## the bytes here are in little-endian order. # CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}} -# CHECK: Contents of section __got: +# CHECK: Contents of section __DATA_CONST,__got: ## Check that this section contains a nonzero pointer. It should point to ## _weak_external_for_gotpcrel. # CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}} diff --git a/lld/test/MachO/x86-64-reloc-unsigned.s b/lld/test/MachO/x86-64-reloc-unsigned.s index 52a3d536139c9..211a64b9ad3d2 100644 --- a/lld/test/MachO/x86-64-reloc-unsigned.s +++ b/lld/test/MachO/x86-64-reloc-unsigned.s @@ -2,9 +2,9 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: lld -flavor darwinnew -o %t %t.o # RUN: llvm-objdump --full-contents %t | FileCheck %s -# CHECK: Contents of section foo: +# CHECK: Contents of section __DATA,foo: # CHECK: 100001000 08100000 01000000 -# CHECK: Contents of section bar: +# CHECK: Contents of section __DATA,bar: # CHECK: 100001008 011000f0 11211111 02000000 .globl _main, _foo, _bar diff --git a/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml b/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml index 25ba88307fc18..5ad2815b9cd90 100644 --- a/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml +++ b/lld/test/mach-o/do-not-emit-unwind-fde-arm64.yaml @@ -194,7 +194,7 @@ page-size: 0x00000000 # Also make sure the reloc for the FDE->function is the correct offset # It should be the offset from the fixup location back to the address # of the function we are referencing -# CODE: Contents of section __eh_frame: +# CODE: Contents of section __TEXT,__eh_frame: # This is the CIE: # CODE-NEXT: {{[0-9abcdef]*}} 1c000000 00000000 017a504c 52000178 # CODE-NEXT: {{[0-9abcdef]*}} 1e0700bd ffffffff ffffff00 100c1f00 diff --git a/lld/test/mach-o/eh-frame-relocs-arm64.yaml b/lld/test/mach-o/eh-frame-relocs-arm64.yaml index f8d538d7c109f..e669aaf8109b5 100644 --- a/lld/test/mach-o/eh-frame-relocs-arm64.yaml +++ b/lld/test/mach-o/eh-frame-relocs-arm64.yaml @@ -303,7 +303,7 @@ page-size: 0x00000000 # correct offset # It should be the offset from the fixup location back to the address # of the function we are referencing -# CODE: Contents of section __eh_frame: +# CODE: Contents of section __TEXT,__eh_frame: # This is the CIE: # CODE-NEXT: {{[0-9abcdef]*}} 18000000 00000000 037a504c 52000178 # CODE-NEXT: {{[0-9abcdef]*}} 1e079bd1 ffffff10 100c1f00 28000000 @@ -315,4 +315,4 @@ page-size: 0x00000000 # And a new CIE starts at this 00000018 right below here # CODE-NEXT: {{[0-9abcdef]*}} 019d0200 00000000 18000000 00000000 # CODE-NEXT: {{[0-9abcdef]*}} 037a504c 52000178 1e079b8d ffffff10 -# This is the important offset for its CIE->pfunc ^~~~~~~~~ \ No newline at end of file +# This is the important offset for its CIE->pfunc ^~~~~~~~~ From 64bb582f4a07d7195a6e6a44a34d166a06f0f071 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 4 Sep 2020 10:26:05 -0700 Subject: [PATCH 216/465] Fix the type of the invoke function in the block ABI documentation rdar://problem/67892794 --- clang/docs/Block-ABI-Apple.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/docs/Block-ABI-Apple.rst b/clang/docs/Block-ABI-Apple.rst index d038cdfe9bd20..e21a8b68b5cd1 100644 --- a/clang/docs/Block-ABI-Apple.rst +++ b/clang/docs/Block-ABI-Apple.rst @@ -35,7 +35,8 @@ High Level ========== The ABI of ``Blocks`` consist of their layout and the runtime functions required -by the compiler. A ``Block`` consists of a structure of the following form: +by the compiler. A ``Block`` of type ``R (^)(P...)`` consists of a structure of +the following form: .. code-block:: c @@ -43,7 +44,7 @@ by the compiler. A ``Block`` consists of a structure of the following form: void *isa; // initialized to &_NSConcreteStackBlock or &_NSConcreteGlobalBlock int flags; int reserved; - void (*invoke)(void *, ...); + R (*invoke)(struct Block_literal_1 *, P...); struct Block_descriptor_1 { unsigned long int reserved; // NULL unsigned long int size; // sizeof(struct Block_literal_1) From 00eb6fef0809ed143975ad16f3988a4fee13261b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Sep 2020 17:19:56 +0100 Subject: [PATCH 217/465] [DSE,MemorySSA] Check for throwing instrs between killing/killed def. We also have to check all uses between the killing & killed def and check if any of them is throwing. --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 8 ++++++++ .../DeadStoreElimination/MSSA/multiblock-malloc-free.ll | 4 +++- .../DeadStoreElimination/MSSA/multiblock-throwing.ll | 1 - 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 7e4ef1ed9cd84..0296d20bc07b9 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -2023,6 +2023,14 @@ struct DSEState { if (isMemTerminator(DefLoc, UseInst)) continue; + if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) { + LLVM_DEBUG(dbgs() << " ... found throwing instruction\n"); + Cache.KnownReads.insert(UseAccess); + Cache.KnownReads.insert(StartAccess); + Cache.KnownReads.insert(EarlierAccess); + return None; + } + // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. if (isReadClobber(DefLoc, UseInst)) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll index 46712ccc92ff9..763362dd3d479 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -37,10 +37,12 @@ bb3: ret void } - +; We cannot remove the store in the entry block, because @unknown_func could +; unwind and the stored value could be read by the caller. define void @test17(i32* noalias %P) { ; CHECK-LABEL: @test17( ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 1, i32* [[P]], align 4 ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB3:%.*]] ; CHECK: bb1: ; CHECK-NEXT: call void @unknown_func() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll index d297ac3f44f8a..f6031e86bef07 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; XFAIL: * ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" From 54205f0bd2377503b818d7f62cc4ed63ef5b1e94 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Fri, 4 Sep 2020 13:54:21 -0400 Subject: [PATCH 218/465] [PowerPC] Allow const pointers for load builtins in altivec.h The load builtins in altivec.h do not have const in the signature for the pointer parameter. This prevents using them for loading from constant pointers. A notable case for such a use is Eigen. This patch simply adds the missing const. Fixes: https://bugs.llvm.org/show_bug.cgi?id=47408 --- clang/lib/Headers/altivec.h | 90 +++++------ clang/test/CodeGen/builtins-ppc-altivec.c | 171 ++++++++++---------- clang/test/CodeGen/builtins-ppc-p10vector.c | 14 +- clang/test/CodeGen/builtins-ppc-xl-xst.c | 165 +++++++++++-------- 4 files changed, 238 insertions(+), 202 deletions(-) diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 47119d7026838..9fda383074f6b 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -2702,67 +2702,67 @@ vec_insert_exp(vector unsigned int __a, vector unsigned int __b) { } #if defined(__powerpc64__) -static __inline__ vector signed char __ATTRS_o_ai vec_xl_len(signed char *__a, +static __inline__ vector signed char __ATTRS_o_ai vec_xl_len(const signed char *__a, size_t __b) { return (vector signed char)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned char __ATTRS_o_ai -vec_xl_len(unsigned char *__a, size_t __b) { +vec_xl_len(const unsigned char *__a, size_t __b) { return (vector unsigned char)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector signed short __ATTRS_o_ai vec_xl_len(signed short *__a, +static __inline__ vector signed short __ATTRS_o_ai vec_xl_len(const signed short *__a, size_t __b) { return (vector signed short)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned short __ATTRS_o_ai -vec_xl_len(unsigned short *__a, size_t __b) { +vec_xl_len(const unsigned short *__a, size_t __b) { return (vector unsigned short)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector signed int __ATTRS_o_ai vec_xl_len(signed int *__a, +static __inline__ vector signed int __ATTRS_o_ai vec_xl_len(const signed int *__a, size_t __b) { return (vector signed int)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_len(unsigned int *__a, +static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_len(const unsigned int *__a, size_t __b) { return (vector unsigned int)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector float __ATTRS_o_ai vec_xl_len(float *__a, size_t __b) { +static __inline__ vector float __ATTRS_o_ai vec_xl_len(const float *__a, size_t __b) { return (vector float)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector signed __int128 __ATTRS_o_ai -vec_xl_len(signed __int128 *__a, size_t __b) { +vec_xl_len(const signed __int128 *__a, size_t __b) { return (vector signed __int128)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_len(unsigned __int128 *__a, size_t __b) { +vec_xl_len(const unsigned __int128 *__a, size_t __b) { return (vector unsigned __int128)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector signed long long __ATTRS_o_ai -vec_xl_len(signed long long *__a, size_t __b) { +vec_xl_len(const signed long long *__a, size_t __b) { return (vector signed long long)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned long long __ATTRS_o_ai -vec_xl_len(unsigned long long *__a, size_t __b) { +vec_xl_len(const unsigned long long *__a, size_t __b) { return (vector unsigned long long)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector double __ATTRS_o_ai vec_xl_len(double *__a, +static __inline__ vector double __ATTRS_o_ai vec_xl_len(const double *__a, size_t __b) { return (vector double)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned char __ATTRS_o_ai -vec_xl_len_r(unsigned char *__a, size_t __b) { +vec_xl_len_r(const unsigned char *__a, size_t __b) { vector unsigned char __res = (vector unsigned char)__builtin_vsx_lxvll(__a, (__b << 56)); #ifdef __LITTLE_ENDIAN__ @@ -16447,41 +16447,41 @@ typedef vector unsigned int unaligned_vec_uint __attribute__((aligned(1))); typedef vector float unaligned_vec_float __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed char vec_xl(signed long long __offset, - signed char *__ptr) { + const signed char *__ptr) { return *(unaligned_vec_schar *)(__ptr + __offset); } static inline __ATTRS_o_ai vector unsigned char -vec_xl(signed long long __offset, unsigned char *__ptr) { +vec_xl(signed long long __offset, const unsigned char *__ptr) { return *(unaligned_vec_uchar*)(__ptr + __offset); } static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset, - signed short *__ptr) { + const signed short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sshort *)__addr; } static inline __ATTRS_o_ai vector unsigned short -vec_xl(signed long long __offset, unsigned short *__ptr) { +vec_xl(signed long long __offset, const unsigned short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ushort *)__addr; } static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset, - signed int *__ptr) { + const signed int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sint *)__addr; } static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset, - unsigned int *__ptr) { + const unsigned int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_uint *)__addr; } static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset, - float *__ptr) { + const float *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_float *)__addr; } @@ -16492,19 +16492,19 @@ typedef vector unsigned long long unaligned_vec_ull __attribute__((aligned(1))); typedef vector double unaligned_vec_double __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed long long -vec_xl(signed long long __offset, signed long long *__ptr) { +vec_xl(signed long long __offset, const signed long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sll *)__addr; } static inline __ATTRS_o_ai vector unsigned long long -vec_xl(signed long long __offset, unsigned long long *__ptr) { +vec_xl(signed long long __offset, const unsigned long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ull *)__addr; } static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset, - double *__ptr) { + const double *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_double *)__addr; } @@ -16515,13 +16515,13 @@ typedef vector signed __int128 unaligned_vec_si128 __attribute__((aligned(1))); typedef vector unsigned __int128 unaligned_vec_ui128 __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed __int128 -vec_xl(signed long long __offset, signed __int128 *__ptr) { +vec_xl(signed long long __offset, const signed __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_si128 *)__addr; } static inline __ATTRS_o_ai vector unsigned __int128 -vec_xl(signed long long __offset, unsigned __int128 *__ptr) { +vec_xl(signed long long __offset, const unsigned __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ui128 *)__addr; } @@ -16531,71 +16531,71 @@ vec_xl(signed long long __offset, unsigned __int128 *__ptr) { #ifdef __LITTLE_ENDIAN__ static __inline__ vector signed char __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed char *__ptr) { +vec_xl_be(signed long long __offset, const signed char *__ptr) { vector signed char __vec = (vector signed char)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } static __inline__ vector unsigned char __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned char *__ptr) { +vec_xl_be(signed long long __offset, const unsigned char *__ptr) { vector unsigned char __vec = (vector unsigned char)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } static __inline__ vector signed short __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed short *__ptr) { +vec_xl_be(signed long long __offset, const signed short *__ptr) { vector signed short __vec = (vector signed short)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); } static __inline__ vector unsigned short __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned short *__ptr) { +vec_xl_be(signed long long __offset, const unsigned short *__ptr) { vector unsigned short __vec = (vector unsigned short)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); } static __inline__ vector signed int __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed int *__ptr) { +vec_xl_be(signed long long __offset, const signed int *__ptr) { return (vector signed int)__builtin_vsx_lxvw4x_be(__offset, __ptr); } static __inline__ vector unsigned int __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned int *__ptr) { +vec_xl_be(signed long long __offset, const unsigned int *__ptr) { return (vector unsigned int)__builtin_vsx_lxvw4x_be(__offset, __ptr); } static __inline__ vector float __ATTRS_o_ai -vec_xl_be(signed long long __offset, float *__ptr) { +vec_xl_be(signed long long __offset, const float *__ptr) { return (vector float)__builtin_vsx_lxvw4x_be(__offset, __ptr); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed long long *__ptr) { +vec_xl_be(signed long long __offset, const signed long long *__ptr) { return (vector signed long long)__builtin_vsx_lxvd2x_be(__offset, __ptr); } static __inline__ vector unsigned long long __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned long long *__ptr) { +vec_xl_be(signed long long __offset, const unsigned long long *__ptr) { return (vector unsigned long long)__builtin_vsx_lxvd2x_be(__offset, __ptr); } static __inline__ vector double __ATTRS_o_ai -vec_xl_be(signed long long __offset, double *__ptr) { +vec_xl_be(signed long long __offset, const double *__ptr) { return (vector double)__builtin_vsx_lxvd2x_be(__offset, __ptr); } #endif #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) static __inline__ vector signed __int128 __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed __int128 *__ptr) { +vec_xl_be(signed long long __offset, const signed __int128 *__ptr) { return vec_xl(__offset, __ptr); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned __int128 *__ptr) { +vec_xl_be(signed long long __offset, const unsigned __int128 *__ptr) { return vec_xl(__offset, __ptr); } #endif @@ -16608,44 +16608,44 @@ vec_xl_be(signed long long __offset, unsigned __int128 *__ptr) { /* vect_xl_sext */ static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_sext(signed long long __offset, signed char *__pointer) { +vec_xl_sext(signed long long __offset, const signed char *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_sext(signed long long __offset, signed short *__pointer) { +vec_xl_sext(signed long long __offset, const signed short *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_sext(signed long long __offset, signed int *__pointer) { +vec_xl_sext(signed long long __offset, const signed int *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_sext(signed long long __offset, signed long long *__pointer) { +vec_xl_sext(signed long long __offset, const signed long long *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } /* vec_xl_zext */ static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_zext(signed long long __offset, unsigned char *__pointer) { +vec_xl_zext(signed long long __offset, const unsigned char *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_zext(signed long long __offset, unsigned short *__pointer) { +vec_xl_zext(signed long long __offset, const unsigned short *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_zext(signed long long __offset, unsigned int *__pointer) { +vec_xl_zext(signed long long __offset, const unsigned int *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_zext(signed long long __offset, unsigned long long *__pointer) { +vec_xl_zext(signed long long __offset, const unsigned long long *__pointer) { return (vector unsigned __int128)*(__pointer + __offset); } diff --git a/clang/test/CodeGen/builtins-ppc-altivec.c b/clang/test/CodeGen/builtins-ppc-altivec.c index d53011b37d413..1212f594ee8cb 100644 --- a/clang/test/CodeGen/builtins-ppc-altivec.c +++ b/clang/test/CodeGen/builtins-ppc-altivec.c @@ -38,6 +38,13 @@ vector float res_vf; // CHECK-NOALTIVEC: error: unknown type name 'vector' // CHECK-NOALTIVEC-NOT: '(error)' +const signed char *param_sc_ld; +const unsigned char *param_uc_ld; +const short *param_s_ld; +const unsigned short *param_us_ld; +const int *param_i_ld; +const unsigned int *param_ui_ld; +const float *param_f_ld; signed char param_sc; unsigned char param_uc; @@ -1313,7 +1320,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vsc = vec_ld(0, ¶m_sc); + res_vsc = vec_ld(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1321,7 +1328,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vuc = vec_ld(0, ¶m_uc); + res_vuc = vec_ld(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1333,7 +1340,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vs = vec_ld(0, ¶m_s); + res_vs = vec_ld(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1341,7 +1348,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vus = vec_ld(0, ¶m_us); + res_vus = vec_ld(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1357,7 +1364,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vi = vec_ld(0, ¶m_i); + res_vi = vec_ld(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1365,7 +1372,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vui = vec_ld(0, ¶m_ui); + res_vui = vec_ld(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1377,7 +1384,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vf = vec_ld(0, ¶m_f); + res_vf = vec_ld(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1385,7 +1392,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vsc = vec_lvx(0, ¶m_sc); + res_vsc = vec_lvx(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1393,7 +1400,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vuc = vec_lvx(0, ¶m_uc); + res_vuc = vec_lvx(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1405,7 +1412,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vs = vec_lvx(0, ¶m_s); + res_vs = vec_lvx(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1413,7 +1420,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vus = vec_lvx(0, ¶m_us); + res_vus = vec_lvx(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1429,7 +1436,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vi = vec_lvx(0, ¶m_i); + res_vi = vec_lvx(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1437,7 +1444,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vui = vec_lvx(0, ¶m_ui); + res_vui = vec_lvx(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx @@ -1449,64 +1456,64 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx - res_vf = vec_lvx(0, ¶m_f); + res_vf = vec_lvx(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK-LE: @llvm.ppc.altivec.lvx /* vec_lde */ - res_vsc = vec_lde(0, ¶m_sc); + res_vsc = vec_lde(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvebx // CHECK-LE: @llvm.ppc.altivec.lvebx - res_vuc = vec_lde(0, ¶m_uc); + res_vuc = vec_lde(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvebx // CHECK-LE: @llvm.ppc.altivec.lvebx - res_vs = vec_lde(0, ¶m_s); + res_vs = vec_lde(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvehx // CHECK-LE: @llvm.ppc.altivec.lvehx - res_vus = vec_lde(0, ¶m_us); + res_vus = vec_lde(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvehx // CHECK-LE: @llvm.ppc.altivec.lvehx - res_vi = vec_lde(0, ¶m_i); + res_vi = vec_lde(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx - res_vui = vec_lde(0, ¶m_ui); + res_vui = vec_lde(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx - res_vf = vec_lde(0, ¶m_f); + res_vf = vec_lde(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx - res_vsc = vec_lvebx(0, ¶m_sc); + res_vsc = vec_lvebx(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvebx // CHECK-LE: @llvm.ppc.altivec.lvebx - res_vuc = vec_lvebx(0, ¶m_uc); + res_vuc = vec_lvebx(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvebx // CHECK-LE: @llvm.ppc.altivec.lvebx - res_vs = vec_lvehx(0, ¶m_s); + res_vs = vec_lvehx(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvehx // CHECK-LE: @llvm.ppc.altivec.lvehx - res_vus = vec_lvehx(0, ¶m_us); + res_vus = vec_lvehx(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvehx // CHECK-LE: @llvm.ppc.altivec.lvehx - res_vi = vec_lvewx(0, ¶m_i); + res_vi = vec_lvewx(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx - res_vui = vec_lvewx(0, ¶m_ui); + res_vui = vec_lvewx(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx - res_vf = vec_lvewx(0, ¶m_f); + res_vf = vec_lvewx(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvewx // CHECK-LE: @llvm.ppc.altivec.lvewx @@ -1515,7 +1522,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vsc = vec_ldl(0, ¶m_sc); + res_vsc = vec_ldl(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1523,7 +1530,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vuc = vec_ldl(0, ¶m_uc); + res_vuc = vec_ldl(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1535,7 +1542,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vs = vec_ldl(0, ¶m_s); + res_vs = vec_ldl(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1543,7 +1550,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vus = vec_ldl(0, ¶m_us); + res_vus = vec_ldl(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1559,7 +1566,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vi = vec_ldl(0, ¶m_i); + res_vi = vec_ldl(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1567,7 +1574,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vui = vec_ldl(0, ¶m_ui); + res_vui = vec_ldl(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1579,7 +1586,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vf = vec_ldl(0, ¶m_f); + res_vf = vec_ldl(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1587,7 +1594,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vsc = vec_lvxl(0, ¶m_sc); + res_vsc = vec_lvxl(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1599,7 +1606,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vuc = vec_lvxl(0, ¶m_uc); + res_vuc = vec_lvxl(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1607,7 +1614,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vs = vec_lvxl(0, ¶m_s); + res_vs = vec_lvxl(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1615,7 +1622,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vus = vec_lvxl(0, ¶m_us); + res_vus = vec_lvxl(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1631,7 +1638,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vi = vec_lvxl(0, ¶m_i); + res_vi = vec_lvxl(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1639,7 +1646,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vui = vec_lvxl(0, ¶m_ui); + res_vui = vec_lvxl(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1651,7 +1658,7 @@ void test6() { // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl - res_vf = vec_lvxl(0, ¶m_f); + res_vf = vec_lvxl(0, param_f_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK-LE: @llvm.ppc.altivec.lvxl @@ -1665,12 +1672,12 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.vlogefp /* vec_lvsl */ - res_vuc = vec_lvsl(0, ¶m_i); + res_vuc = vec_lvsl(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.lvsl /* vec_lvsr */ - res_vuc = vec_lvsr(0, ¶m_i); + res_vuc = vec_lvsr(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvsr // CHECK-LE: @llvm.ppc.altivec.lvsr @@ -6029,7 +6036,7 @@ void test6() { // CHECK-LE: insertelement <4 x float> /* vec_lvlx */ - res_vsc = vec_lvlx(0, ¶m_sc); + res_vsc = vec_lvlx(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6049,7 +6056,7 @@ void test6() { // CHECK-LE: store <16 x i8> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vuc = vec_lvlx(0, ¶m_uc); + res_vuc = vec_lvlx(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6079,7 +6086,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vs = vec_lvlx(0, ¶m_s); + res_vs = vec_lvlx(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6099,7 +6106,7 @@ void test6() { // CHECK-LE: store <8 x i16> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vus = vec_lvlx(0, ¶m_us); + res_vus = vec_lvlx(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6139,7 +6146,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vi = vec_lvlx(0, ¶m_i); + res_vi = vec_lvlx(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6159,7 +6166,7 @@ void test6() { // CHECK-LE: store <4 x i32> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vui = vec_lvlx(0, ¶m_ui); + res_vui = vec_lvlx(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6200,7 +6207,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.vperm /* vec_lvlxl */ - res_vsc = vec_lvlxl(0, ¶m_sc); + res_vsc = vec_lvlxl(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6220,7 +6227,7 @@ void test6() { // CHECK-LE: store <16 x i8> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vuc = vec_lvlxl(0, ¶m_uc); + res_vuc = vec_lvlxl(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6250,7 +6257,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vs = vec_lvlxl(0, ¶m_s); + res_vs = vec_lvlxl(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6270,7 +6277,7 @@ void test6() { // CHECK-LE: store <8 x i16> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vus = vec_lvlxl(0, ¶m_us); + res_vus = vec_lvlxl(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6310,7 +6317,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vi = vec_lvlxl(0, ¶m_i); + res_vi = vec_lvlxl(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6330,7 +6337,7 @@ void test6() { // CHECK-LE: store <4 x i32> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vui = vec_lvlxl(0, ¶m_ui); + res_vui = vec_lvlxl(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6371,7 +6378,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.vperm /* vec_lvrx */ - res_vsc = vec_lvrx(0, ¶m_sc); + res_vsc = vec_lvrx(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6391,7 +6398,7 @@ void test6() { // CHECK-LE: store <16 x i8> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vuc = vec_lvrx(0, ¶m_uc); + res_vuc = vec_lvrx(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6421,7 +6428,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vs = vec_lvrx(0, ¶m_s); + res_vs = vec_lvrx(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6441,7 +6448,7 @@ void test6() { // CHECK-LE: store <8 x i16> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vus = vec_lvrx(0, ¶m_us); + res_vus = vec_lvrx(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6481,7 +6488,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vi = vec_lvrx(0, ¶m_i); + res_vi = vec_lvrx(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6501,7 +6508,7 @@ void test6() { // CHECK-LE: store <4 x i32> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vui = vec_lvrx(0, ¶m_ui); + res_vui = vec_lvrx(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvx // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6542,7 +6549,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.vperm /* vec_lvrxl */ - res_vsc = vec_lvrxl(0, ¶m_sc); + res_vsc = vec_lvrxl(0, param_sc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6562,7 +6569,7 @@ void test6() { // CHECK-LE: store <16 x i8> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vuc = vec_lvrxl(0, ¶m_uc); + res_vuc = vec_lvrxl(0, param_uc_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <16 x i8> zeroinitializer @@ -6592,7 +6599,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vs = vec_lvrxl(0, ¶m_s); + res_vs = vec_lvrxl(0, param_s_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6612,7 +6619,7 @@ void test6() { // CHECK-LE: store <8 x i16> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vus = vec_lvrxl(0, ¶m_us); + res_vus = vec_lvrxl(0, param_us_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <8 x i16> zeroinitializer @@ -6652,7 +6659,7 @@ void test6() { // CHECK-LE: @llvm.ppc.altivec.lvsl // CHECK-LE: @llvm.ppc.altivec.vperm - res_vi = vec_lvrxl(0, ¶m_i); + res_vi = vec_lvrxl(0, param_i_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -6672,7 +6679,7 @@ void test6() { // CHECK-LE: store <4 x i32> zeroinitializer // CHECK-LE: @llvm.ppc.altivec.vperm - res_vui = vec_lvrxl(0, ¶m_ui); + res_vui = vec_lvrxl(0, param_ui_ld); // CHECK: @llvm.ppc.altivec.lvxl // CHECK: @llvm.ppc.altivec.lvsl // CHECK: store <4 x i32> zeroinitializer @@ -9354,31 +9361,31 @@ void test8() { void test9() { // CHECK-LABEL: define void @test9 // CHECK-LE-LABEL: define void @test9 - res_vsc = vec_xl(param_sll, ¶m_sc); + res_vsc = vec_xl(param_sll, param_sc_ld); // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 - res_vuc = vec_xl(param_sll, ¶m_uc); + res_vuc = vec_xl(param_sll, param_uc_ld); // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 - res_vs = vec_xl(param_sll, ¶m_s); + res_vs = vec_xl(param_sll, param_s_ld); // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 - res_vus = vec_xl(param_sll, ¶m_us); + res_vus = vec_xl(param_sll, param_us_ld); // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 - res_vi = vec_xl(param_sll, ¶m_i); + res_vi = vec_xl(param_sll, param_i_ld); // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 - res_vui = vec_xl(param_sll, ¶m_ui); + res_vui = vec_xl(param_sll, param_ui_ld); // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 - res_vf = vec_xl(param_sll, ¶m_f); + res_vf = vec_xl(param_sll, param_f_ld); // CHECK: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1 // CHECK-LE: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1 } @@ -9420,35 +9427,35 @@ void test10() { void test11() { // CHECK-LABEL: define void @test11 // CHECK-LE-LABEL: define void @test11 - res_vsc = vec_xl_be(param_sll, ¶m_sc); + res_vsc = vec_xl_be(param_sll, param_sc_ld); // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> - res_vuc = vec_xl_be(param_sll, ¶m_uc); + res_vuc = vec_xl_be(param_sll, param_uc_ld); // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> - res_vs = vec_xl_be(param_sll, ¶m_s); + res_vs = vec_xl_be(param_sll, param_s_ld); // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> - res_vus = vec_xl_be(param_sll, ¶m_us); + res_vus = vec_xl_be(param_sll, param_us_ld); // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> - res_vi = vec_xl_be(param_sll, ¶m_i); + res_vi = vec_xl_be(param_sll, param_i_ld); // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) - res_vui = vec_xl_be(param_sll, ¶m_ui); + res_vui = vec_xl_be(param_sll, param_ui_ld); // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) - res_vf = vec_xl_be(param_sll, ¶m_f); + res_vf = vec_xl_be(param_sll, param_f_ld); // CHECK: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) } diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c index ac766e264b2da..c850ebd1c70f8 100644 --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -23,11 +23,15 @@ vector double vda, vdb; signed int *iap; unsigned int uia, uib, *uiap; signed char *cap; -unsigned char uca, *ucap; -signed short *sap; -unsigned short usa, *usap; -signed long long *llap, llb; -unsigned long long ulla, *ullap; +unsigned char uca; +const unsigned char *ucap; +const signed short *sap; +unsigned short usa; +const unsigned short *usap; +const signed long long *llap; +signed long long llb; +unsigned long long ulla; +const unsigned long long *ullap; vector signed long long test_vec_mul_sll(void) { // CHECK: mul <2 x i64> diff --git a/clang/test/CodeGen/builtins-ppc-xl-xst.c b/clang/test/CodeGen/builtins-ppc-xl-xst.c index 8ad45376e9779..226e9d8aff4e6 100644 --- a/clang/test/CodeGen/builtins-ppc-xl-xst.c +++ b/clang/test/CodeGen/builtins-ppc-xl-xst.c @@ -17,10 +17,12 @@ // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i16*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <8 x i16>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i16*, align 8 // CHECK-NEXT: store <8 x i16>* [[C:%.*]], <8 x i16>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i16* [[PTR:%.*]], i16** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i16* [[ST:%.*]], i16** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i16* [[LD:%.*]], i16** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i16* [[TMP0]], i16** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i16*, i16** [[__PTR_ADDR_I]], align 8 @@ -35,7 +37,7 @@ // CHECK-NEXT: store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i16*, i16** [[ST_ADDR]], align 8 // CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i16* [[TMP10]], i16** [[__PTR_ADDR_I2]], align 8 @@ -50,9 +52,9 @@ // CHECK-NEXT: store <8 x i16> [[TMP14]], <8 x i16>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test1(vector signed short *c, signed short *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test1(vector signed short *c, signed short *st, const signed short *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test2( @@ -65,10 +67,12 @@ void test1(vector signed short *c, signed short *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i16*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <8 x i16>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i16*, align 8 // CHECK-NEXT: store <8 x i16>* [[C:%.*]], <8 x i16>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i16* [[PTR:%.*]], i16** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i16* [[ST:%.*]], i16** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i16* [[LD:%.*]], i16** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i16*, i16** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i16* [[TMP0]], i16** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i16*, i16** [[__PTR_ADDR_I]], align 8 @@ -83,7 +87,7 @@ void test1(vector signed short *c, signed short *ptr) { // CHECK-NEXT: store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>*, <8 x i16>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i16*, i16** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i16*, i16** [[ST_ADDR]], align 8 // CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i16* [[TMP10]], i16** [[__PTR_ADDR_I2]], align 8 @@ -98,9 +102,10 @@ void test1(vector signed short *c, signed short *ptr) { // CHECK-NEXT: store <8 x i16> [[TMP14]], <8 x i16>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test2(vector unsigned short *c, unsigned short *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test2(vector unsigned short *c, unsigned short *st, + const unsigned short *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test3( @@ -113,10 +118,12 @@ void test2(vector unsigned short *c, unsigned short *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <4 x i32>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: store <4 x i32>* [[C:%.*]], <4 x i32>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i32* [[ST:%.*]], i32** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i32* [[LD:%.*]], i32** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i32* [[TMP0]], i32** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[__PTR_ADDR_I]], align 8 @@ -131,7 +138,7 @@ void test2(vector unsigned short *c, unsigned short *ptr) { // CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[ST_ADDR]], align 8 // CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i32* [[TMP10]], i32** [[__PTR_ADDR_I2]], align 8 @@ -146,9 +153,9 @@ void test2(vector unsigned short *c, unsigned short *ptr) { // CHECK-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test3(vector signed int *c, signed int *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test3(vector signed int *c, signed int *st, const signed int *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test4( @@ -161,10 +168,12 @@ void test3(vector signed int *c, signed int *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <4 x i32>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: store <4 x i32>* [[C:%.*]], <4 x i32>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i32* [[ST:%.*]], i32** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i32* [[LD:%.*]], i32** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i32* [[TMP0]], i32** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[__PTR_ADDR_I]], align 8 @@ -179,7 +188,7 @@ void test3(vector signed int *c, signed int *ptr) { // CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>*, <4 x i32>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32*, i32** [[ST_ADDR]], align 8 // CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i32* [[TMP10]], i32** [[__PTR_ADDR_I2]], align 8 @@ -194,9 +203,9 @@ void test3(vector signed int *c, signed int *ptr) { // CHECK-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test4(vector unsigned int *c, unsigned int *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test4(vector unsigned int *c, unsigned int *st, const unsigned int *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test5( @@ -209,10 +218,12 @@ void test4(vector unsigned int *c, unsigned int *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i64*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <2 x i64>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i64*, align 8 // CHECK-NEXT: store <2 x i64>* [[C:%.*]], <2 x i64>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64* [[ST:%.*]], i64** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i64* [[LD:%.*]], i64** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i64* [[TMP0]], i64** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** [[__PTR_ADDR_I]], align 8 @@ -227,7 +238,7 @@ void test4(vector unsigned int *c, unsigned int *ptr) { // CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i64*, i64** [[ST_ADDR]], align 8 // CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i64* [[TMP10]], i64** [[__PTR_ADDR_I2]], align 8 @@ -242,9 +253,10 @@ void test4(vector unsigned int *c, unsigned int *ptr) { // CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test5(vector signed long long *c, signed long long *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test5(vector signed long long *c, signed long long *st, + const signed long long *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test6( @@ -257,10 +269,12 @@ void test5(vector signed long long *c, signed long long *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i64*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <2 x i64>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca i64*, align 8 // CHECK-NEXT: store <2 x i64>* [[C:%.*]], <2 x i64>** [[C_ADDR]], align 8 -// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store i64* [[ST:%.*]], i64** [[ST_ADDR]], align 8 +// CHECK-NEXT: store i64* [[LD:%.*]], i64** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store i64* [[TMP0]], i64** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** [[__PTR_ADDR_I]], align 8 @@ -275,7 +289,7 @@ void test5(vector signed long long *c, signed long long *ptr) { // CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>*, <2 x i64>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load i64*, i64** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i64*, i64** [[ST_ADDR]], align 8 // CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store i64* [[TMP10]], i64** [[__PTR_ADDR_I2]], align 8 @@ -290,9 +304,10 @@ void test5(vector signed long long *c, signed long long *ptr) { // CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test6(vector unsigned long long *c, unsigned long long *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test6(vector unsigned long long *c, unsigned long long *st, + const unsigned long long *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test7( @@ -305,10 +320,12 @@ void test6(vector unsigned long long *c, unsigned long long *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca float*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <4 x float>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca float*, align 8 // CHECK-NEXT: store <4 x float>* [[C:%.*]], <4 x float>** [[C_ADDR]], align 8 -// CHECK-NEXT: store float* [[PTR:%.*]], float** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load float*, float** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store float* [[ST:%.*]], float** [[ST_ADDR]], align 8 +// CHECK-NEXT: store float* [[LD:%.*]], float** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load float*, float** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store float* [[TMP0]], float** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[__PTR_ADDR_I]], align 8 @@ -323,7 +340,7 @@ void test6(vector unsigned long long *c, unsigned long long *ptr) { // CHECK-NEXT: store <4 x float> [[TMP6]], <4 x float>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>*, <4 x float>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load float*, float** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load float*, float** [[ST_ADDR]], align 8 // CHECK-NEXT: store <4 x float> [[TMP9]], <4 x float>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store float* [[TMP10]], float** [[__PTR_ADDR_I2]], align 8 @@ -338,9 +355,9 @@ void test6(vector unsigned long long *c, unsigned long long *ptr) { // CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test7(vector float *c, float *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test7(vector float *c, float *st, const float *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-LABEL: @test8( @@ -353,10 +370,12 @@ void test7(vector float *c, float *ptr) { // CHECK-NEXT: [[__PTR_ADDR_I:%.*]] = alloca double*, align 8 // CHECK-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <2 x double>*, align 8 -// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK-NEXT: [[ST_ADDR:%.*]] = alloca double*, align 8 +// CHECK-NEXT: [[LD_ADDR:%.*]] = alloca double*, align 8 // CHECK-NEXT: store <2 x double>* [[C:%.*]], <2 x double>** [[C_ADDR]], align 8 -// CHECK-NEXT: store double* [[PTR:%.*]], double** [[PTR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-NEXT: store double* [[ST:%.*]], double** [[ST_ADDR]], align 8 +// CHECK-NEXT: store double* [[LD:%.*]], double** [[LD_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load double*, double** [[LD_ADDR]], align 8 // CHECK-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-NEXT: store double* [[TMP0]], double** [[__PTR_ADDR_I]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[__PTR_ADDR_I]], align 8 @@ -371,7 +390,7 @@ void test7(vector float *c, float *ptr) { // CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 16 // CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>*, <2 x double>** [[C_ADDR]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load double*, double** [[ST_ADDR]], align 8 // CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[__VEC_ADDR_I]], align 16 // CHECK-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-NEXT: store double* [[TMP10]], double** [[__PTR_ADDR_I2]], align 8 @@ -386,9 +405,9 @@ void test7(vector float *c, float *ptr) { // CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[TMP16]], align 1 // CHECK-NEXT: ret void // -void test8(vector double *c, double *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test8(vector double *c, double *st, const double *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } #ifdef __POWER8_VECTOR__ @@ -402,10 +421,12 @@ void test8(vector double *c, double *ptr) { // CHECK-P8-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i128*, align 8 // CHECK-P8-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-P8-NEXT: [[C_ADDR:%.*]] = alloca <1 x i128>*, align 8 -// CHECK-P8-NEXT: [[PTR_ADDR:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[ST_ADDR:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[LD_ADDR:%.*]] = alloca i128*, align 8 // CHECK-P8-NEXT: store <1 x i128>* [[C:%.*]], <1 x i128>** [[C_ADDR]], align 8 -// CHECK-P8-NEXT: store i128* [[PTR:%.*]], i128** [[PTR_ADDR]], align 8 -// CHECK-P8-NEXT: [[TMP0:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: store i128* [[ST:%.*]], i128** [[ST_ADDR]], align 8 +// CHECK-P8-NEXT: store i128* [[LD:%.*]], i128** [[LD_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP0:%.*]] = load i128*, i128** [[LD_ADDR]], align 8 // CHECK-P8-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-P8-NEXT: store i128* [[TMP0]], i128** [[__PTR_ADDR_I]], align 8 // CHECK-P8-NEXT: [[TMP1:%.*]] = load i128*, i128** [[__PTR_ADDR_I]], align 8 @@ -420,7 +441,7 @@ void test8(vector double *c, double *ptr) { // CHECK-P8-NEXT: store <1 x i128> [[TMP6]], <1 x i128>* [[TMP7]], align 16 // CHECK-P8-NEXT: [[TMP8:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8 // CHECK-P8-NEXT: [[TMP9:%.*]] = load <1 x i128>, <1 x i128>* [[TMP8]], align 16 -// CHECK-P8-NEXT: [[TMP10:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP10:%.*]] = load i128*, i128** [[ST_ADDR]], align 8 // CHECK-P8-NEXT: store <1 x i128> [[TMP9]], <1 x i128>* [[__VEC_ADDR_I]], align 16 // CHECK-P8-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-P8-NEXT: store i128* [[TMP10]], i128** [[__PTR_ADDR_I2]], align 8 @@ -435,9 +456,10 @@ void test8(vector double *c, double *ptr) { // CHECK-P8-NEXT: store <1 x i128> [[TMP14]], <1 x i128>* [[TMP16]], align 1 // CHECK-P8-NEXT: ret void // -void test9(vector signed __int128 *c, signed __int128 *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test9(vector signed __int128 *c, signed __int128 *st, + const signed __int128 *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } // CHECK-P8-LABEL: @test10( @@ -450,10 +472,12 @@ void test9(vector signed __int128 *c, signed __int128 *ptr) { // CHECK-P8-NEXT: [[__PTR_ADDR_I:%.*]] = alloca i128*, align 8 // CHECK-P8-NEXT: [[__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK-P8-NEXT: [[C_ADDR:%.*]] = alloca <1 x i128>*, align 8 -// CHECK-P8-NEXT: [[PTR_ADDR:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[ST_ADDR:%.*]] = alloca i128*, align 8 +// CHECK-P8-NEXT: [[LD_ADDR:%.*]] = alloca i128*, align 8 // CHECK-P8-NEXT: store <1 x i128>* [[C:%.*]], <1 x i128>** [[C_ADDR]], align 8 -// CHECK-P8-NEXT: store i128* [[PTR:%.*]], i128** [[PTR_ADDR]], align 8 -// CHECK-P8-NEXT: [[TMP0:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: store i128* [[ST:%.*]], i128** [[ST_ADDR]], align 8 +// CHECK-P8-NEXT: store i128* [[LD:%.*]], i128** [[LD_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP0:%.*]] = load i128*, i128** [[LD_ADDR]], align 8 // CHECK-P8-NEXT: store i64 3, i64* [[__OFFSET_ADDR_I]], align 8 // CHECK-P8-NEXT: store i128* [[TMP0]], i128** [[__PTR_ADDR_I]], align 8 // CHECK-P8-NEXT: [[TMP1:%.*]] = load i128*, i128** [[__PTR_ADDR_I]], align 8 @@ -468,7 +492,7 @@ void test9(vector signed __int128 *c, signed __int128 *ptr) { // CHECK-P8-NEXT: store <1 x i128> [[TMP6]], <1 x i128>* [[TMP7]], align 16 // CHECK-P8-NEXT: [[TMP8:%.*]] = load <1 x i128>*, <1 x i128>** [[C_ADDR]], align 8 // CHECK-P8-NEXT: [[TMP9:%.*]] = load <1 x i128>, <1 x i128>* [[TMP8]], align 16 -// CHECK-P8-NEXT: [[TMP10:%.*]] = load i128*, i128** [[PTR_ADDR]], align 8 +// CHECK-P8-NEXT: [[TMP10:%.*]] = load i128*, i128** [[ST_ADDR]], align 8 // CHECK-P8-NEXT: store <1 x i128> [[TMP9]], <1 x i128>* [[__VEC_ADDR_I]], align 16 // CHECK-P8-NEXT: store i64 7, i64* [[__OFFSET_ADDR_I1]], align 8 // CHECK-P8-NEXT: store i128* [[TMP10]], i128** [[__PTR_ADDR_I2]], align 8 @@ -483,8 +507,9 @@ void test9(vector signed __int128 *c, signed __int128 *ptr) { // CHECK-P8-NEXT: store <1 x i128> [[TMP14]], <1 x i128>* [[TMP16]], align 1 // CHECK-P8-NEXT: ret void // -void test10(vector unsigned __int128 *c, unsigned __int128 *ptr) { - *c = vec_xl(3ll, ptr); - vec_xst(*c, 7ll, ptr); +void test10(vector unsigned __int128 *c, unsigned __int128 *st, + const unsigned __int128 *ld) { + *c = vec_xl(3ll, ld); + vec_xst(*c, 7ll, st); } #endif From 1eae19a87f5432a60020d5a6affb0dc28a2a661b Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 4 Sep 2020 12:23:49 -0700 Subject: [PATCH 219/465] [compiler-rt] Try to correct test after 3f1a9b7eca0 added segment names to objdump output --- compiler-rt/test/asan/TestCases/Darwin/cstring_section.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c b/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c index 952d6fcdd4656..958d7777111c4 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c +++ b/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c @@ -6,9 +6,9 @@ // Check that "Hello.\n" is in __asan_cstring and not in __cstring. // CHECK: Contents of section __asan_cstring: // CHECK: 48656c6c {{.*}} Hello. -// CHECK: Contents of section __const: +// CHECK: Contents of section {{.*}}__const: // CHECK-NOT: 48656c6c {{.*}} Hello. -// CHECK: Contents of section __cstring: +// CHECK: Contents of section {{.*}}__cstring: // CHECK-NOT: 48656c6c {{.*}} Hello. int main(int argc, char *argv[]) { From 97866b8de81ce71cf9ae9e50feb450335b0537a0 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Fri, 4 Sep 2020 12:25:01 -0700 Subject: [PATCH 220/465] [ThinLTO][Legacy] Fix StringRef assertion from ThinLTO bots This is a presumed fix for FireFox thinLTO bot fix which hits assertion failure for invalid index when access StringRef. Techinically, `IRName` in the symtab should not be empty string for the entries we cared about but this will help to fix the bot before more information can be provided. Otherwise, NFCI. --- llvm/lib/LTO/ThinLTOCodeGenerator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 4adc9a22a7b2d..14dae848b3624 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -276,7 +276,7 @@ static void computeGUIDPreservedSymbols(const lto::InputFile &File, // Iterate the symbols in the input file and if the input has preserved symbol // compute the GUID for the symbol. for (const auto &Sym : File.symbols()) { - if (PreservedSymbols.count(Sym.getName())) + if (PreservedSymbols.count(Sym.getName()) && !Sym.getIRName().empty()) GUIDs.insert(GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( Sym.getIRName(), GlobalValue::ExternalLinkage, ""))); } From 6b50ce3ac95362bfe12aaa2b7bdbbef250f225fb Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 4 Sep 2020 22:07:36 +0200 Subject: [PATCH 221/465] [SCEV] Add tests for min/max intrinsics (NFC) --- .../ScalarEvolution/minmax-intrinsics.ll | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 llvm/test/Analysis/ScalarEvolution/minmax-intrinsics.ll diff --git a/llvm/test/Analysis/ScalarEvolution/minmax-intrinsics.ll b/llvm/test/Analysis/ScalarEvolution/minmax-intrinsics.ll new file mode 100644 index 0000000000000..c0395c328fca3 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/minmax-intrinsics.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s + +declare i32 @llvm.umax.i32(i32, i32) +declare i32 @llvm.umin.i32(i32, i32) +declare i32 @llvm.smax.i32(i32, i32) +declare i32 @llvm.smin.i32(i32, i32) + +define i32 @umax(i32 %x, i32 %y) { +; CHECK-LABEL: 'umax' +; CHECK-NEXT: Classifying expressions for: @umax +; CHECK-NEXT: %z = call i32 @llvm.umax.i32(i32 %x, i32 %y) +; CHECK-NEXT: --> %z U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @umax +; + %z = call i32 @llvm.umax.i32(i32 %x, i32 %y) + ret i32 %z +} + +define i32 @umin(i32 %x, i32 %y) { +; CHECK-LABEL: 'umin' +; CHECK-NEXT: Classifying expressions for: @umin +; CHECK-NEXT: %z = call i32 @llvm.umin.i32(i32 %x, i32 %y) +; CHECK-NEXT: --> %z U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @umin +; + %z = call i32 @llvm.umin.i32(i32 %x, i32 %y) + ret i32 %z +} + +define i32 @smax(i32 %x, i32 %y) { +; CHECK-LABEL: 'smax' +; CHECK-NEXT: Classifying expressions for: @smax +; CHECK-NEXT: %z = call i32 @llvm.smax.i32(i32 %x, i32 %y) +; CHECK-NEXT: --> %z U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @smax +; + %z = call i32 @llvm.smax.i32(i32 %x, i32 %y) + ret i32 %z +} + +define i32 @smin(i32 %x, i32 %y) { +; CHECK-LABEL: 'smin' +; CHECK-NEXT: Classifying expressions for: @smin +; CHECK-NEXT: %z = call i32 @llvm.smin.i32(i32 %x, i32 %y) +; CHECK-NEXT: --> %z U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @smin +; + %z = call i32 @llvm.smin.i32(i32 %x, i32 %y) + ret i32 %z +} + +define i32 @clamp(i32 %x) { +; CHECK-LABEL: 'clamp' +; CHECK-NEXT: Classifying expressions for: @clamp +; CHECK-NEXT: %y = call i32 @llvm.umax.i32(i32 %x, i32 10) +; CHECK-NEXT: --> %y U: full-set S: full-set +; CHECK-NEXT: %z = call i32 @llvm.umin.i32(i32 %y, i32 20) +; CHECK-NEXT: --> %z U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @clamp +; + %y = call i32 @llvm.umax.i32(i32 %x, i32 10) + %z = call i32 @llvm.umin.i32(i32 %y, i32 20) + ret i32 %z +} From 3b64052a2572e69355969a59a0c4c8aba4fee887 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 4 Sep 2020 12:27:40 -0700 Subject: [PATCH 222/465] [ORC] Fix some bugs in TPCDynamicLibrarySearchGenerator, use in llvm-jitlink. TPCDynamicLibrarySearchGenerator was generating errors on missing symbols, but that doesn't fit the DefinitionGenerator contract: A symbol that isn't generated by a particular generator should not cause an error. This commit fixes the error by using SymbolLookupFlags::WeaklyReferencedSymbol for all elements of the lookup, and switches llvm-jitlink to use TPCDynamicLibrarySearchGenerator. --- .../Orc/TPCDynamicLibrarySearchGenerator.h | 15 ++++++--- .../Orc/TargetProcessControl.h | 5 ++- .../Orc/TPCDynamicLibrarySearchGenerator.cpp | 32 +++++++++++++++---- .../Orc/TargetProcessControl.cpp | 6 ++-- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 12 ++----- 5 files changed, 46 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h b/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h index d35c8abc84a2e..7c1b72befde76 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h @@ -14,6 +14,7 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H #define LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" namespace llvm { @@ -21,6 +22,8 @@ namespace orc { class TPCDynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator { public: + using SymbolPredicate = unique_function; + /// Create a DynamicLibrarySearchGenerator that searches for symbols in the /// library with the given handle. /// @@ -28,19 +31,22 @@ class TPCDynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator { /// will be searched for. If the predicate is not given then all symbols will /// be searched for. TPCDynamicLibrarySearchGenerator(TargetProcessControl &TPC, - TargetProcessControl::DylibHandle H) - : TPC(TPC), H(H) {} + TargetProcessControl::DylibHandle H, + SymbolPredicate Allow = SymbolPredicate()) + : TPC(TPC), H(H), Allow(std::move(Allow)) {} /// Permanently loads the library at the given path and, on success, returns /// a DynamicLibrarySearchGenerator that will search it for symbol definitions /// in the library. On failure returns the reason the library failed to load. static Expected> - Load(TargetProcessControl &TPC, const char *LibraryPath); + Load(TargetProcessControl &TPC, const char *LibraryPath, + SymbolPredicate Allow = SymbolPredicate()); /// Creates a TPCDynamicLibrarySearchGenerator that searches for symbols in /// the target process. static Expected> - GetForTargetProcess(TargetProcessControl &TPC) { + GetForTargetProcess(TargetProcessControl &TPC, + SymbolPredicate Allow = SymbolPredicate()) { return Load(TPC, nullptr); } @@ -51,6 +57,7 @@ class TPCDynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator { private: TargetProcessControl &TPC; TargetProcessControl::DylibHandle H; + SymbolPredicate Allow; }; } // end namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h index 159b6e8d56df3..d3349753284e2 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h @@ -149,8 +149,11 @@ class TargetProcessControl { virtual Expected loadDylib(const char *DylibPath) = 0; /// Search for symbols in the target process. + /// /// The result of the lookup is a 2-dimentional array of target addresses - /// that correspond to the lookup order. + /// that correspond to the lookup order. If a required symbol is not + /// found then this method will return an error. If a weakly referenced + /// symbol is not found then it be assigned a '0' value in the result. virtual Expected lookupSymbols(LookupRequest Request) = 0; protected: diff --git a/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp b/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp index 18de5b616eec8..d85f3c38feb9d 100644 --- a/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp @@ -13,12 +13,14 @@ namespace orc { Expected> TPCDynamicLibrarySearchGenerator::Load(TargetProcessControl &TPC, - const char *LibraryPath) { + const char *LibraryPath, + SymbolPredicate Allow) { auto Handle = TPC.loadDylib(LibraryPath); if (!Handle) return Handle.takeError(); - return std::make_unique(TPC, *Handle); + return std::make_unique(TPC, *Handle, + std::move(Allow)); } Error TPCDynamicLibrarySearchGenerator::tryToGenerate( @@ -28,22 +30,38 @@ Error TPCDynamicLibrarySearchGenerator::tryToGenerate( if (Symbols.empty()) return Error::success(); + SymbolLookupSet LookupSymbols; + + for (auto &KV : Symbols) { + // Skip symbols that don't match the filter. + if (Allow && !Allow(KV.first)) + continue; + LookupSymbols.add(KV.first, SymbolLookupFlags::WeaklyReferencedSymbol); + } + SymbolMap NewSymbols; - TargetProcessControl::LookupRequestElement Request(H, Symbols); + TargetProcessControl::LookupRequestElement Request(H, LookupSymbols); auto Result = TPC.lookupSymbols(Request); if (!Result) return Result.takeError(); assert(Result->size() == 1 && "Results for more than one library returned"); - assert(Result->front().size() == Symbols.size() && + assert(Result->front().size() == LookupSymbols.size() && "Result has incorrect number of elements"); + SymbolNameVector MissingSymbols; auto ResultI = Result->front().begin(); - for (auto &KV : Symbols) - NewSymbols[KV.first] = - JITEvaluatedSymbol(*ResultI++, JITSymbolFlags::Exported); + for (auto &KV : LookupSymbols) + if (*ResultI) + NewSymbols[KV.first] = + JITEvaluatedSymbol(*ResultI++, JITSymbolFlags::Exported); + + // If there were no resolved symbols bail out. + if (NewSymbols.empty()) + return Error::success(); + // Define resolved symbols. return JD.define(absoluteSymbols(std::move(NewSymbols))); } diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp index 59c9ce2393c92..1e7736d1f40db 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp @@ -78,14 +78,14 @@ SelfTargetProcessControl::lookupSymbols(LookupRequest Request) { auto &Sym = KV.first; std::string Tmp((*Sym).data() + !!GlobalManglingPrefix, (*Sym).size() - !!GlobalManglingPrefix); - if (void *Addr = Dylib->getAddressOfSymbol(Tmp.c_str())) - R.back().push_back(pointerToJITTargetAddress(Addr)); - else if (KV.second == SymbolLookupFlags::RequiredSymbol) { + void *Addr = Dylib->getAddressOfSymbol(Tmp.c_str()); + if (!Addr && KV.second == SymbolLookupFlags::RequiredSymbol) { // FIXME: Collect all failing symbols before erroring out. SymbolNameVector MissingSymbols; MissingSymbols.push_back(Sym); return make_error(std::move(MissingSymbols)); } + R.back().push_back(pointerToJITTargetAddress(Addr)); } } diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index f1cc1f2550b31..a848bf029dbf0 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -17,6 +17,7 @@ #include "llvm/BinaryFormat/Magic.h" #include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" @@ -30,7 +31,6 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" @@ -802,19 +802,13 @@ Error sanitizeArguments(const Session &S) { } Error loadProcessSymbols(Session &S) { - std::string ErrMsg; - if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr, &ErrMsg)) - return make_error(std::move(ErrMsg), inconvertibleErrorCode()); - - char GlobalPrefix = - S.TPC->getTargetTriple().getObjectFormat() == Triple::MachO ? '_' : '\0'; auto InternedEntryPointName = S.ES.intern(EntryPointName); auto FilterMainEntryPoint = [InternedEntryPointName](SymbolStringPtr Name) { return Name != InternedEntryPointName; }; S.MainJD->addGenerator( - ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( - GlobalPrefix, FilterMainEntryPoint))); + ExitOnErr(orc::TPCDynamicLibrarySearchGenerator::GetForTargetProcess( + *S.TPC, std::move(FilterMainEntryPoint)))); return Error::success(); } From 398ba37230431b6ab263cf91bf4c03e4f2c7acb8 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 4 Sep 2020 13:27:42 -0700 Subject: [PATCH 223/465] [LiveDebugVariables] Delete unneeded doInitialization --- llvm/lib/CodeGen/LiveDebugVariables.cpp | 4 ---- llvm/lib/CodeGen/LiveDebugVariables.h | 1 - 2 files changed, 5 deletions(-) diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index dd0a900fc2be0..97cc7a0c30343 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -1444,10 +1444,6 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { static_cast(pImpl)->emitDebugValues(VRM); } -bool LiveDebugVariables::doInitialization(Module &M) { - return Pass::doInitialization(M); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void LiveDebugVariables::dump() const { if (pImpl) diff --git a/llvm/lib/CodeGen/LiveDebugVariables.h b/llvm/lib/CodeGen/LiveDebugVariables.h index 74e738ec3e568..07dd3a83866fd 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/llvm/lib/CodeGen/LiveDebugVariables.h @@ -56,7 +56,6 @@ class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; - bool doInitialization(Module &) override; }; } // end namespace llvm From 052f83890349822a606c916b2fee501bc087652b Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Tue, 1 Sep 2020 16:29:36 -0700 Subject: [PATCH 224/465] [libclang] Expose couple more AST details via cursors Differential Revision: https://reviews.llvm.org/D86991 --- clang/include/clang-c/Index.h | 20 ++++ clang/tools/libclang/CIndex.cpp | 36 ++++++++ clang/tools/libclang/libclang.exports | 3 + clang/unittests/libclang/LibclangTest.cpp | 106 ++++++++++++++++++++++ 4 files changed, 165 insertions(+) diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 5fa728d6d66c8..4838937c8da67 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2940,6 +2940,26 @@ CINDEX_LINKAGE int clang_getCursorPlatformAvailability( CINDEX_LINKAGE void clang_disposeCXPlatformAvailability(CXPlatformAvailability *availability); +/** + * If cursor refers to a variable declaration and it has initializer returns + * cursor referring to the initializer otherwise return null cursor. + */ +CINDEX_LINKAGE CXCursor clang_Cursor_getVarDeclInitializer(CXCursor cursor); + +/** + * If cursor refers to a variable declaration that has global storage returns 1. + * If cursor refers to a variable declaration that doesn't have global storage + * returns 0. Otherwise returns -1. + */ +CINDEX_LINKAGE int clang_Cursor_hasVarDeclGlobalStorage(CXCursor cursor); + +/** + * If cursor refers to a variable declaration that has external storage + * returns 1. If cursor refers to a variable declaration that doesn't have + * external storage returns 0. Otherwise returns -1. + */ +CINDEX_LINKAGE int clang_Cursor_hasVarDeclExternalStorage(CXCursor cursor); + /** * Describe the "language" of the entity referred to by a cursor. */ diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 683b517d79fda..87138cd3b4a32 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -8851,6 +8851,42 @@ void clang::PrintLibclangResourceUsage(CXTranslationUnit TU) { clang_disposeCXTUResourceUsage(Usage); } +CXCursor clang_Cursor_getVarDeclInitializer(CXCursor cursor) { + const Decl *const D = getCursorDecl(cursor); + if (!D) + return clang_getNullCursor(); + const auto *const VD = dyn_cast(D); + if (!VD) + return clang_getNullCursor(); + const Expr *const Init = VD->getInit(); + if (!Init) + return clang_getNullCursor(); + + return cxcursor::MakeCXCursor(Init, VD, cxcursor::getCursorTU(cursor)); +} + +int clang_Cursor_hasVarDeclGlobalStorage(CXCursor cursor) { + const Decl *const D = getCursorDecl(cursor); + if (!D) + return -1; + const auto *const VD = dyn_cast(D); + if (!VD) + return -1; + + return VD->hasGlobalStorage(); +} + +int clang_Cursor_hasVarDeclExternalStorage(CXCursor cursor) { + const Decl *const D = getCursorDecl(cursor); + if (!D) + return -1; + const auto *const VD = dyn_cast(D); + if (!VD) + return -1; + + return VD->hasExternalStorage(); +} + //===----------------------------------------------------------------------===// // Misc. utility functions. //===----------------------------------------------------------------------===// diff --git a/clang/tools/libclang/libclang.exports b/clang/tools/libclang/libclang.exports index defbaa91a488c..618f99f348fb9 100644 --- a/clang/tools/libclang/libclang.exports +++ b/clang/tools/libclang/libclang.exports @@ -382,3 +382,6 @@ clang_PrintingPolicy_setProperty clang_PrintingPolicy_dispose clang_install_aborting_llvm_fatal_error_handler clang_uninstall_llvm_fatal_error_handler +clang_Cursor_getVarDeclInitializer +clang_Cursor_hasVarDeclGlobalStorage +clang_Cursor_hasVarDeclExternalStorage diff --git a/clang/unittests/libclang/LibclangTest.cpp b/clang/unittests/libclang/LibclangTest.cpp index e2e3a8e887bab..27fe10dfbb0f2 100644 --- a/clang/unittests/libclang/LibclangTest.cpp +++ b/clang/unittests/libclang/LibclangTest.cpp @@ -736,3 +736,109 @@ TEST_F(LibclangSerializationTest, TokenKindsAreCorrectAfterLoading) { CheckTokenKinds(); } + +TEST_F(LibclangParseTest, clang_getVarDeclInitializer) { + std::string Main = "main.cpp"; + WriteFile(Main, "int foo() { return 5; }; const int a = foo();"); + ClangTU = clang_parseTranslationUnit(Index, Main.c_str(), nullptr, 0, nullptr, + 0, TUFlags); + + CXCursor C = clang_getTranslationUnitCursor(ClangTU); + clang_visitChildren( + C, + [](CXCursor cursor, CXCursor parent, + CXClientData client_data) -> CXChildVisitResult { + if (clang_getCursorKind(cursor) == CXCursor_VarDecl) { + const CXCursor Initializer = clang_Cursor_getVarDeclInitializer(cursor); + EXPECT_FALSE(clang_Cursor_isNull(Initializer)); + CXString Spelling = clang_getCursorSpelling(Initializer); + const char* const SpellingCSstr = clang_getCString(Spelling); + EXPECT_TRUE(SpellingCSstr); + EXPECT_EQ(std::string(SpellingCSstr), std::string("foo")); + clang_disposeString(Spelling); + return CXChildVisit_Break; + } + return CXChildVisit_Continue; + }, + nullptr); +} + +TEST_F(LibclangParseTest, clang_hasVarDeclGlobalStorageFalse) { + std::string Main = "main.cpp"; + WriteFile(Main, "void foo() { int a; }"); + ClangTU = clang_parseTranslationUnit(Index, Main.c_str(), nullptr, 0, nullptr, + 0, TUFlags); + + CXCursor C = clang_getTranslationUnitCursor(ClangTU); + clang_visitChildren( + C, + [](CXCursor cursor, CXCursor parent, + CXClientData client_data) -> CXChildVisitResult { + if (clang_getCursorKind(cursor) == CXCursor_VarDecl) { + EXPECT_FALSE(clang_Cursor_hasVarDeclGlobalStorage(cursor)); + return CXChildVisit_Break; + } + return CXChildVisit_Continue; + }, + nullptr); +} + +TEST_F(LibclangParseTest, clang_Cursor_hasVarDeclGlobalStorageTrue) { + std::string Main = "main.cpp"; + WriteFile(Main, "int a;"); + ClangTU = clang_parseTranslationUnit(Index, Main.c_str(), nullptr, 0, nullptr, + 0, TUFlags); + + CXCursor C = clang_getTranslationUnitCursor(ClangTU); + clang_visitChildren( + C, + [](CXCursor cursor, CXCursor parent, + CXClientData client_data) -> CXChildVisitResult { + if (clang_getCursorKind(cursor) == CXCursor_VarDecl) { + EXPECT_TRUE(clang_Cursor_hasVarDeclGlobalStorage(cursor)); + return CXChildVisit_Break; + } + return CXChildVisit_Continue; + }, + nullptr); +} + +TEST_F(LibclangParseTest, clang_Cursor_hasVarDeclExternalStorageFalse) { + std::string Main = "main.cpp"; + WriteFile(Main, "int a;"); + ClangTU = clang_parseTranslationUnit(Index, Main.c_str(), nullptr, 0, nullptr, + 0, TUFlags); + + CXCursor C = clang_getTranslationUnitCursor(ClangTU); + clang_visitChildren( + C, + [](CXCursor cursor, CXCursor parent, + CXClientData client_data) -> CXChildVisitResult { + if (clang_getCursorKind(cursor) == CXCursor_VarDecl) { + EXPECT_FALSE(clang_Cursor_hasVarDeclExternalStorage(cursor)); + return CXChildVisit_Break; + } + return CXChildVisit_Continue; + }, + nullptr); +} + +TEST_F(LibclangParseTest, clang_Cursor_hasVarDeclExternalStorageTrue) { + std::string Main = "main.cpp"; + WriteFile(Main, "extern int a;"); + ClangTU = clang_parseTranslationUnit(Index, Main.c_str(), nullptr, 0, nullptr, + 0, TUFlags); + + CXCursor C = clang_getTranslationUnitCursor(ClangTU); + clang_visitChildren( + C, + [](CXCursor cursor, CXCursor parent, + CXClientData client_data) -> CXChildVisitResult { + if (clang_getCursorKind(cursor) == CXCursor_VarDecl) { + EXPECT_TRUE(clang_Cursor_hasVarDeclExternalStorage(cursor)); + return CXChildVisit_Break; + } + return CXChildVisit_Continue; + }, + nullptr); +} From b3e139444f41e5c281972b4002c09ee0500831f6 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 4 Sep 2020 22:41:25 +0200 Subject: [PATCH 225/465] [BDCE] Add tests for min/max intrinsincs (NFC) --- llvm/test/Transforms/BDCE/intrinsics.ll | 83 +++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 llvm/test/Transforms/BDCE/intrinsics.ll diff --git a/llvm/test/Transforms/BDCE/intrinsics.ll b/llvm/test/Transforms/BDCE/intrinsics.ll new file mode 100644 index 0000000000000..5a186f01fd298 --- /dev/null +++ b/llvm/test/Transforms/BDCE/intrinsics.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -bdce < %s | FileCheck %s + +declare i8 @llvm.umax.i8(i8, i8) +declare i8 @llvm.umin.i8(i8, i8) +declare i8 @llvm.smax.i8(i8, i8) +declare i8 @llvm.smin.i8(i8, i8) + +define i8 @umax(i8 %x, i8 %y, i1 %a, i1 %b) { +; CHECK-LABEL: @umax( +; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] +; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X2]], i8 [[Y2]]) +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[M]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %a2 = zext i1 %a to i8 + %b2 = zext i1 %b to i8 + %x2 = or i8 %x, %a2 + %y2 = or i8 %y, %b2 + %m = call i8 @llvm.umax.i8(i8 %x2, i8 %y2) + %r = lshr i8 %m, 1 + ret i8 %r +} + +define i8 @umin(i8 %x, i8 %y, i1 %a, i1 %b) { +; CHECK-LABEL: @umin( +; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] +; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X2]], i8 [[Y2]]) +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[M]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %a2 = zext i1 %a to i8 + %b2 = zext i1 %b to i8 + %x2 = or i8 %x, %a2 + %y2 = or i8 %y, %b2 + %m = call i8 @llvm.umin.i8(i8 %x2, i8 %y2) + %r = lshr i8 %m, 1 + ret i8 %r +} + +define i8 @smax(i8 %x, i8 %y, i1 %a, i1 %b) { +; CHECK-LABEL: @smax( +; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] +; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X2]], i8 [[Y2]]) +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[M]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %a2 = zext i1 %a to i8 + %b2 = zext i1 %b to i8 + %x2 = or i8 %x, %a2 + %y2 = or i8 %y, %b2 + %m = call i8 @llvm.smax.i8(i8 %x2, i8 %y2) + %r = lshr i8 %m, 1 + ret i8 %r +} + +define i8 @smin(i8 %x, i8 %y, i1 %a, i1 %b) { +; CHECK-LABEL: @smin( +; CHECK-NEXT: [[A2:%.*]] = zext i1 [[A:%.*]] to i8 +; CHECK-NEXT: [[B2:%.*]] = zext i1 [[B:%.*]] to i8 +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], [[A2]] +; CHECK-NEXT: [[Y2:%.*]] = or i8 [[Y:%.*]], [[B2]] +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X2]], i8 [[Y2]]) +; CHECK-NEXT: [[R:%.*]] = lshr i8 [[M]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %a2 = zext i1 %a to i8 + %b2 = zext i1 %b to i8 + %x2 = or i8 %x, %a2 + %y2 = or i8 %y, %b2 + %m = call i8 @llvm.smin.i8(i8 %x2, i8 %y2) + %r = lshr i8 %m, 1 + ret i8 %r +} From c5d6b2b7e5769abfe9500c6dd6da89cd09a0348c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 4 Sep 2020 11:14:48 -0400 Subject: [PATCH 226/465] [InstCombine] add test for assume in block with unreachable (PR47416); NFC --- llvm/test/Transforms/InstCombine/assume.ll | 117 +++++++++++---------- 1 file changed, 63 insertions(+), 54 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index 6f33e83ee3362..f8a7bb01ff64f 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -1,66 +1,56 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +declare void @llvm.assume(i1) #1 + +; Check that the alignment has been upgraded and that the assume has not +; been removed: + define i32 @foo1(i32* %a) #0 { ; CHECK-LABEL: @foo1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 32 +; CHECK-NEXT: [[T0:%.*]] = load i32, i32* [[A:%.*]], align 32 ; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 ; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 ; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: ret i32 [[T0]] ; -entry: - %0 = load i32, i32* %a, align 4 - -; Check that the alignment has been upgraded and that the assume has not -; been removed: - + %t0 = load i32, i32* %a, align 4 %ptrint = ptrtoint i32* %a to i64 %maskedptr = and i64 %ptrint, 31 %maskcond = icmp eq i64 %maskedptr, 0 tail call void @llvm.assume(i1 %maskcond) - - ret i32 %0 + ret i32 %t0 } +; Same check as in @foo1, but make sure it works if the assume is first too. + define i32 @foo2(i32* %a) #0 { ; CHECK-LABEL: @foo2( -; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A:%.*]] to i64 ; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 ; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 32 -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: [[T0:%.*]] = load i32, i32* [[A]], align 32 +; CHECK-NEXT: ret i32 [[T0]] ; -entry: -; Same check as in @foo1, but make sure it works if the assume is first too. - %ptrint = ptrtoint i32* %a to i64 %maskedptr = and i64 %ptrint, 31 %maskcond = icmp eq i64 %maskedptr, 0 tail call void @llvm.assume(i1 %maskcond) - - %0 = load i32, i32* %a, align 4 - ret i32 %0 + %t0 = load i32, i32* %a, align 4 + ret i32 %t0 } -declare void @llvm.assume(i1) #1 - define i32 @simple(i32 %a) #1 { ; CHECK-LABEL: @simple( -; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 4 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: ret i32 4 ; -entry: - - %cmp = icmp eq i32 %a, 4 tail call void @llvm.assume(i1 %cmp) ret i32 %a @@ -68,72 +58,55 @@ entry: define i32 @can1(i1 %a, i1 %b, i1 %c) { ; CHECK-LABEL: @can1( -; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.assume(i1 [[A:%.*]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[B:%.*]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[C:%.*]]) ; CHECK-NEXT: ret i32 5 ; -entry: %and1 = and i1 %a, %b %and = and i1 %and1, %c tail call void @llvm.assume(i1 %and) - - ret i32 5 } define i32 @can2(i1 %a, i1 %b, i1 %c) { ; CHECK-LABEL: @can2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[A:%.*]], true -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[B:%.*]], true +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[A:%.*]], true ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[B:%.*]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]]) ; CHECK-NEXT: ret i32 5 ; -entry: %v = or i1 %a, %b %w = xor i1 %v, 1 tail call void @llvm.assume(i1 %w) - - ret i32 5 } define i32 @bar1(i32 %a) #0 { ; CHECK-LABEL: @bar1( -; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 7 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: ret i32 1 ; -entry: %and1 = and i32 %a, 3 - - %and = and i32 %a, 7 %cmp = icmp eq i32 %and, 1 tail call void @llvm.assume(i1 %cmp) - ret i32 %and1 } define i32 @bar2(i32 %a) #0 { ; CHECK-LABEL: @bar2( -; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 7 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: ret i32 1 ; -entry: - %and = and i32 %a, 7 %cmp = icmp eq i32 %and, 1 tail call void @llvm.assume(i1 %cmp) - %and1 = and i32 %a, 3 ret i32 %and1 } @@ -176,15 +149,11 @@ define i32 @bar4(i32 %a, i32 %b) { ; entry: %and1 = and i32 %b, 3 - - %and = and i32 %a, 7 %cmp = icmp eq i32 %and, 1 tail call void @llvm.assume(i1 %cmp) - %cmp2 = icmp eq i32 %a, %b tail call void @llvm.assume(i1 %cmp2) - ret i32 %and1 } @@ -377,10 +346,10 @@ define i32 @assumption_conflicts_with_known_bits(i32 %a, i32 %b) { define void @debug_interference(i8 %x) { ; CHECK-LABEL: @debug_interference( ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i8 [[X:%.*]], 0 -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9 +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, [[META7:metadata !.*]], metadata !DIExpression()), [[DBG9:!dbg !.*]] ; CHECK-NEXT: tail call void @llvm.assume(i1 false) -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9 -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9 +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, [[META7]], metadata !DIExpression()), [[DBG9]] +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, [[META7]], metadata !DIExpression()), [[DBG9]] ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP2]]) ; CHECK-NEXT: ret void ; @@ -532,7 +501,6 @@ define void @always_true_assumption() { ; call void @llvm.assume(i1 true) ret void - } ; The alloca guarantees that the low bits of %a are zero because of alignment. @@ -588,6 +556,47 @@ define void @PR36270(i32 %b) { unreachable } +; PR47416 + +define i32 @unreachable_assume(i32 %x, i32 %y) { +; CHECK-LABEL: @unreachable_assume( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP0]], [[CMP1]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[OR]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[IF:%.*]], label [[EXIT:%.*]] +; CHECK: if: +; CHECK-NEXT: [[A:%.*]] = and i32 [[Y]], -2 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[A]], 104 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP3]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP4]]) +; CHECK-NEXT: unreachable +; +entry: + %cmp0 = icmp sgt i32 %x, 1 + %cmp1 = icmp eq i32 %y, 1 + %or = or i1 %cmp0, %cmp1 + tail call void @llvm.assume(i1 %or) + %cmp2 = icmp eq i32 %x, 1 + br i1 %cmp2, label %if, label %exit + +if: + %a = and i32 %y, -2 + %cmp3 = icmp ne i32 %a, 104 + tail call void @llvm.assume(i1 %cmp3) + br label %exit + +exit: + %cmp4 = icmp eq i32 %x, 2 + tail call void @llvm.assume(i1 %cmp4) + unreachable +} + declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} From 35c6d56c0437f79b0f472b22e7353727a433881c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 4 Sep 2020 16:56:21 -0400 Subject: [PATCH 227/465] [InstCombine] rename tmp values to avoid scripted FileCheck conflicts; NFC --- .../InstCombine/pr33689_same_bitwidth.ll | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll index 35f0e5caadc6e..0ffd41d60a4c8 100644 --- a/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll +++ b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll @@ -13,41 +13,41 @@ target datalayout = "p:16:16" define void @f(i1 %cond) { ; CHECK-LABEL: @f( ; CHECK-NEXT: bb0: -; CHECK-NEXT: [[TMP12:%.*]] = alloca [2 x i32], align 8 -; CHECK-NEXT: [[TMP12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP12]], i16 0, i16 0 +; CHECK-NEXT: [[T12:%.*]] = alloca [2 x i32], align 8 +; CHECK-NEXT: [[T12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[T12]], i16 0, i16 0 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint [2 x i32]* [[TMP12]] to i16 -; CHECK-NEXT: store i16 [[TMP8]], i16* @a, align 2 +; CHECK-NEXT: [[T8:%.*]] = ptrtoint [2 x i32]* [[T12]] to i16 +; CHECK-NEXT: store i16 [[T8]], i16* @a, align 2 ; CHECK-NEXT: unreachable ; CHECK: bb2: -; CHECK-NEXT: [[TMP9:%.*]] = load i16*, i16** @b, align 2 -; CHECK-NEXT: store i16 0, i16* [[TMP9]], align 2 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP12_SUB]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -1 -; CHECK-NEXT: store i32 [[TMP11]], i32* [[TMP12_SUB]], align 8 +; CHECK-NEXT: [[T9:%.*]] = load i16*, i16** @b, align 2 +; CHECK-NEXT: store i16 0, i16* [[T9]], align 2 +; CHECK-NEXT: [[T10:%.*]] = load i32, i32* [[T12_SUB]], align 8 +; CHECK-NEXT: [[T11:%.*]] = add i32 [[T10]], -1 +; CHECK-NEXT: store i32 [[T11]], i32* [[T12_SUB]], align 8 ; CHECK-NEXT: ret void ; bb0: - %tmp1 = alloca %i64_t - %tmp2 = bitcast %i64_t* %tmp1 to i32* - %useless3 = bitcast %i64_t* %tmp1 to i16* + %t1 = alloca %i64_t + %t2 = bitcast %i64_t* %t1 to i32* + %useless3 = bitcast %i64_t* %t1 to i16* %useless4 = getelementptr inbounds i16, i16* %useless3, i16 undef %useless5 = bitcast i16* %useless4 to i32* br i1 %cond, label %bb1, label %bb2 bb1: ; preds = %bb0 - %useless6 = insertvalue [1 x i32*] undef, i32* %tmp2, 0 + %useless6 = insertvalue [1 x i32*] undef, i32* %t2, 0 %useless7 = insertvalue [1 x i32*] %useless6, i32* null, 0 - %tmp8 = ptrtoint i32* %tmp2 to i16 - store i16 %tmp8, i16* @a + %t8 = ptrtoint i32* %t2 to i16 + store i16 %t8, i16* @a unreachable bb2: ; preds = %bb0 - %tmp9 = load i16*, i16** @b - store i16 0, i16* %tmp9 - %tmp10 = load i32, i32* %tmp2 - %tmp11 = sub i32 %tmp10, 1 - store i32 %tmp11, i32* %tmp2 + %t9 = load i16*, i16** @b + store i16 0, i16* %t9 + %t10 = load i32, i32* %t2 + %t11 = sub i32 %t10, 1 + store i32 %t11, i32* %t2 ret void } From 35b35a373d013df8e80c0c9840c085aa6a79c4dc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 4 Sep 2020 13:52:26 -0700 Subject: [PATCH 228/465] [X86] Prevent shuffle combining from creating an identical X86ISD::SHUF128. This can cause an infinite loop if SimplifiedDemandedElts asks for the node to replace itself. A similar protection exists in other places in shuffle combining. Fixes ISPC https://github.com/ispc/ispc/issues/1864 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 + .../CodeGen/X86/vector-shuffle-512-v16.ll | 44 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 517e6c0931805..1212585b4baf5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34909,6 +34909,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (Mask[1] < 0 || Mask[3] < 0 || Mask[1] == (Mask[3] % 2)); if (!isAnyZero(Mask) && !PreferPERMQ) { + if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128) + return SDValue(); // Nothing to do! if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask, V1, V2, DAG)) return DAG.getBitcast(RootVT, V); } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index e9f4aa99f148b..4fce1a38a754a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -764,3 +764,47 @@ define <16 x float> @mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_ %res = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> ret <16 x float> %res } + +%struct.foo = type { [4 x double], [3 x [4 x double]], [4 x double] } + +; This test previously hung in shuffle combining. https://github.com/ispc/ispc/issues/1864 +define void @ispc_1864(<16 x float>* %arg) { +; ALL-LABEL: ispc_1864: +; ALL: # %bb.0: # %bb +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .cfi_def_cfa_offset 16 +; ALL-NEXT: .cfi_offset %rbp, -16 +; ALL-NEXT: movq %rsp, %rbp +; ALL-NEXT: .cfi_def_cfa_register %rbp +; ALL-NEXT: andq $-64, %rsp +; ALL-NEXT: subq $4864, %rsp # imm = 0x1300 +; ALL-NEXT: vbroadcastss {{.*#+}} ymm0 = [-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0,-5.0E+0] +; ALL-NEXT: vmulps 32(%rdi), %ymm0, %ymm0 +; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 +; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,4,5,0,1,0,1] +; ALL-NEXT: vmovapd %ymm0, {{[0-9]+}}(%rsp) +; ALL-NEXT: movq %rbp, %rsp +; ALL-NEXT: popq %rbp +; ALL-NEXT: .cfi_def_cfa %rsp, 8 +; ALL-NEXT: vzeroupper +; ALL-NEXT: retq +bb: + %tmp = alloca [30 x %struct.foo], align 64 + %tmp1 = load <16 x float>, <16 x float>* %arg, align 4 + %tmp2 = fmul <16 x float> %tmp1, + %tmp3 = fpext <16 x float> %tmp2 to <16 x double> + %tmp4 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 0 + %tmp5 = extractelement <16 x double> %tmp3, i32 10 + store double %tmp5, double* %tmp4, align 32 + %tmp6 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 1 + %tmp7 = extractelement <16 x double> %tmp3, i32 11 + store double %tmp7, double* %tmp6, align 8 + %tmp8 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 2 + %tmp9 = extractelement <16 x double> %tmp3, i32 12 + store double %tmp9, double* %tmp8, align 16 + %tmp10 = getelementptr inbounds [30 x %struct.foo], [30 x %struct.foo]* %tmp, i64 0, i64 3, i32 2, i64 3 + %tmp11 = extractelement <16 x double> %tmp3, i32 13 + store double %tmp11, double* %tmp10, align 8 + ret void +} + From 69e5abb57b70570cf04671a93246e5e624023650 Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Fri, 4 Sep 2020 12:18:49 -0700 Subject: [PATCH 229/465] [libclang] Add CXRewriter to libclang API Differential Revision: https://reviews.llvm.org/D86992 --- clang/include/clang-c/Rewrite.h | 63 ++++++++++++++++ clang/tools/libclang/CMakeLists.txt | 1 + clang/tools/libclang/Rewrite.cpp | 63 ++++++++++++++++ clang/tools/libclang/libclang.exports | 7 ++ clang/unittests/libclang/LibclangTest.cpp | 88 +++++++++++++++++++++++ 5 files changed, 222 insertions(+) create mode 100644 clang/include/clang-c/Rewrite.h create mode 100644 clang/tools/libclang/Rewrite.cpp diff --git a/clang/include/clang-c/Rewrite.h b/clang/include/clang-c/Rewrite.h new file mode 100644 index 0000000000000..ce1b05594b384 --- /dev/null +++ b/clang/include/clang-c/Rewrite.h @@ -0,0 +1,63 @@ +/*===-- clang-c/Rewrite.h - C CXRewriter --------------------------*- C -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +|*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_CLANG_C_REWRITE_H +#define LLVM_CLANG_C_REWRITE_H + +#include "clang-c/CXString.h" +#include "clang-c/ExternC.h" +#include "clang-c/Index.h" +#include "clang-c/Platform.h" + +LLVM_CLANG_C_EXTERN_C_BEGIN + +typedef void *CXRewriter; + +/** + * Create CXRewriter. + */ +CINDEX_LINKAGE CXRewriter clang_CXRewriter_create(CXTranslationUnit TU); + +/** + * Insert the specified string at the specified location in the original buffer. + */ +CINDEX_LINKAGE void clang_CXRewriter_insertTextBefore(CXRewriter Rew, CXSourceLocation Loc, + const char *Insert); + +/** + * Replace the specified range of characters in the input with the specified + * replacement. + */ +CINDEX_LINKAGE void clang_CXRewriter_replaceText(CXRewriter Rew, CXSourceRange ToBeReplaced, + const char *Replacement); + +/** + * Remove the specified range. + */ +CINDEX_LINKAGE void clang_CXRewriter_removeText(CXRewriter Rew, CXSourceRange ToBeRemoved); + +/** + * Save all changed files to disk. + * Returns 1 if any files were not saved successfully, returns 0 otherwise. + */ +CINDEX_LINKAGE int clang_CXRewriter_overwriteChangedFiles(CXRewriter Rew); + +/** + * Write out rewritten version of the main file to stdout. + */ +CINDEX_LINKAGE void clang_CXRewriter_writeMainFileToStdOut(CXRewriter Rew); + +/** + * Free the given CXRewriter. + */ +CINDEX_LINKAGE void clang_CXRewriter_dispose(CXRewriter Rew); + +LLVM_CLANG_C_EXTERN_C_END + +#endif diff --git a/clang/tools/libclang/CMakeLists.txt b/clang/tools/libclang/CMakeLists.txt index a4077140acee8..4e2c19da0f7c4 100644 --- a/clang/tools/libclang/CMakeLists.txt +++ b/clang/tools/libclang/CMakeLists.txt @@ -20,6 +20,7 @@ set(SOURCES CXType.cpp Indexing.cpp FatalErrorHandler.cpp + Rewrite.cpp ADDITIONAL_HEADERS CIndexDiagnostic.h diff --git a/clang/tools/libclang/Rewrite.cpp b/clang/tools/libclang/Rewrite.cpp new file mode 100644 index 0000000000000..389232d97acc1 --- /dev/null +++ b/clang/tools/libclang/Rewrite.cpp @@ -0,0 +1,63 @@ +//===- Rewrite.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-c/Rewrite.h" +#include "CXSourceLocation.h" +#include "CXTranslationUnit.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Frontend/ASTUnit.h" +#include "clang/Rewrite/Core/Rewriter.h" + +CXRewriter clang_CXRewriter_create(CXTranslationUnit TU) { + if (clang::cxtu::isNotUsableTU(TU)) { + LOG_BAD_TU(TU); + return {}; + } + clang::ASTUnit *AU = clang::cxtu::getASTUnit(TU); + assert(AU); + return reinterpret_cast( + new clang::Rewriter(AU->getSourceManager(), AU->getLangOpts())); +} + +void clang_CXRewriter_insertTextBefore(CXRewriter Rew, CXSourceLocation Loc, + const char *Insert) { + assert(Rew); + clang::Rewriter &R = *reinterpret_cast(Rew); + R.InsertTextBefore(clang::cxloc::translateSourceLocation(Loc), Insert); +} + +void clang_CXRewriter_replaceText(CXRewriter Rew, CXSourceRange ToBeReplaced, + const char *Replacement) { + assert(Rew); + clang::Rewriter &R = *reinterpret_cast(Rew); + R.ReplaceText(clang::cxloc::translateCXRangeToCharRange(ToBeReplaced), + Replacement); +} + +void clang_CXRewriter_removeText(CXRewriter Rew, CXSourceRange ToBeRemoved) { + assert(Rew); + clang::Rewriter &R = *reinterpret_cast(Rew); + R.RemoveText(clang::cxloc::translateCXRangeToCharRange(ToBeRemoved)); +} + +int clang_CXRewriter_overwriteChangedFiles(CXRewriter Rew) { + assert(Rew); + clang::Rewriter &R = *reinterpret_cast(Rew); + return R.overwriteChangedFiles(); +} + +void clang_CXRewriter_writeMainFileToStdOut(CXRewriter Rew) { + assert(Rew); + clang::Rewriter &R = *reinterpret_cast(Rew); + R.getEditBuffer(R.getSourceMgr().getMainFileID()).write(llvm::outs()); +} + +void clang_CXRewriter_dispose(CXRewriter Rew) { + if (Rew) + delete reinterpret_cast(Rew); +} diff --git a/clang/tools/libclang/libclang.exports b/clang/tools/libclang/libclang.exports index 618f99f348fb9..528424713a9a5 100644 --- a/clang/tools/libclang/libclang.exports +++ b/clang/tools/libclang/libclang.exports @@ -385,3 +385,10 @@ clang_uninstall_llvm_fatal_error_handler clang_Cursor_getVarDeclInitializer clang_Cursor_hasVarDeclGlobalStorage clang_Cursor_hasVarDeclExternalStorage +clang_CXRewriter_create +clang_CXRewriter_insertTextBefore +clang_CXRewriter_replaceText +clang_CXRewriter_removeText +clang_CXRewriter_overwriteChangedFiles +clang_CXRewriter_writeMainFileToStdOut +clang_CXRewriter_dispose diff --git a/clang/unittests/libclang/LibclangTest.cpp b/clang/unittests/libclang/LibclangTest.cpp index 27fe10dfbb0f2..fc3ad43b495cf 100644 --- a/clang/unittests/libclang/LibclangTest.cpp +++ b/clang/unittests/libclang/LibclangTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang-c/Index.h" +#include "clang-c/Rewrite.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" @@ -842,3 +843,90 @@ TEST_F(LibclangParseTest, clang_Cursor_hasVarDeclExternalStorageTrue) { }, nullptr); } +class LibclangRewriteTest : public LibclangParseTest { +public: + CXRewriter Rew = nullptr; + std::string Filename; + CXFile File = nullptr; + + void SetUp() override { + LibclangParseTest::SetUp(); + Filename = "file.cpp"; + WriteFile(Filename, "int main() { return 0; }"); + ClangTU = clang_parseTranslationUnit(Index, Filename.c_str(), nullptr, 0, + nullptr, 0, TUFlags); + Rew = clang_CXRewriter_create(ClangTU); + File = clang_getFile(ClangTU, Filename.c_str()); + } + void TearDown() override { + clang_CXRewriter_dispose(Rew); + LibclangParseTest::TearDown(); + } +}; + +static std::string getFileContent(const std::string& Filename) { + std::ifstream RewrittenFile(Filename); + std::string RewrittenFileContent; + std::string Line; + while (std::getline(RewrittenFile, Line)) { + if (RewrittenFileContent.empty()) + RewrittenFileContent = Line; + else { + RewrittenFileContent += "\n" + Line; + } + } + return RewrittenFileContent; +} + +TEST_F(LibclangRewriteTest, RewriteReplace) { + CXSourceLocation B = clang_getLocation(ClangTU, File, 1, 5); + CXSourceLocation E = clang_getLocation(ClangTU, File, 1, 9); + CXSourceRange Rng = clang_getRange(B, E); + + clang_CXRewriter_replaceText(Rew, Rng, "MAIN"); + + ASSERT_EQ(clang_CXRewriter_overwriteChangedFiles(Rew), 0); + EXPECT_EQ(getFileContent(Filename), "int MAIN() { return 0; }"); +} + +TEST_F(LibclangRewriteTest, RewriteReplaceShorter) { + CXSourceLocation B = clang_getLocation(ClangTU, File, 1, 5); + CXSourceLocation E = clang_getLocation(ClangTU, File, 1, 9); + CXSourceRange Rng = clang_getRange(B, E); + + clang_CXRewriter_replaceText(Rew, Rng, "foo"); + + ASSERT_EQ(clang_CXRewriter_overwriteChangedFiles(Rew), 0); + EXPECT_EQ(getFileContent(Filename), "int foo() { return 0; }"); +} + +TEST_F(LibclangRewriteTest, RewriteReplaceLonger) { + CXSourceLocation B = clang_getLocation(ClangTU, File, 1, 5); + CXSourceLocation E = clang_getLocation(ClangTU, File, 1, 9); + CXSourceRange Rng = clang_getRange(B, E); + + clang_CXRewriter_replaceText(Rew, Rng, "patatino"); + + ASSERT_EQ(clang_CXRewriter_overwriteChangedFiles(Rew), 0); + EXPECT_EQ(getFileContent(Filename), "int patatino() { return 0; }"); +} + +TEST_F(LibclangRewriteTest, RewriteInsert) { + CXSourceLocation Loc = clang_getLocation(ClangTU, File, 1, 5); + + clang_CXRewriter_insertTextBefore(Rew, Loc, "ro"); + + ASSERT_EQ(clang_CXRewriter_overwriteChangedFiles(Rew), 0); + EXPECT_EQ(getFileContent(Filename), "int romain() { return 0; }"); +} + +TEST_F(LibclangRewriteTest, RewriteRemove) { + CXSourceLocation B = clang_getLocation(ClangTU, File, 1, 5); + CXSourceLocation E = clang_getLocation(ClangTU, File, 1, 9); + CXSourceRange Rng = clang_getRange(B, E); + + clang_CXRewriter_removeText(Rew, Rng); + + ASSERT_EQ(clang_CXRewriter_overwriteChangedFiles(Rew), 0); + EXPECT_EQ(getFileContent(Filename), "int () { return 0; }"); +} From b07cbccf286831f5d32dddfd4e1da36bf48cbe58 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 4 Sep 2020 21:18:33 +0000 Subject: [PATCH 230/465] [gn build] Port 69e5abb57b7 --- llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn index 5c97bebf137f0..1078ec8f2f430 100644 --- a/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/tools/libclang/BUILD.gn @@ -89,6 +89,7 @@ target(libclang_target_type, "libclang") { "FatalErrorHandler.cpp", "Index_Internal.h", "Indexing.cpp", + "Rewrite.cpp", ] if (host_os == "mac") { ldflags = [ From 781a43840863b85603a710857691a9b5032b0c27 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 4 Sep 2020 23:40:54 +0200 Subject: [PATCH 231/465] [InstSimplify] Add tests for min/max with dominating condition (NFC) --- .../InstSimplify/maxmin_intrinsics.ll | 161 ++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index c2c0f9ebf82dd..d1d711c1c1bd1 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -2133,3 +2133,164 @@ define i8 @umax_lshr(i8 %x, i8 %y) { %max = call i8 @llvm.umax.i8(i8 %x, i8 %shr) ret i8 %max } + +define i8 @umax_dom_cond_uge(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_dom_cond_uge( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M1]] +; CHECK: false: +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %cmp = icmp uge i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @umax_dom_cond_ugt(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_dom_cond_ugt( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M1]] +; CHECK: false: +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %cmp = icmp ugt i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @umax_dom_cond_ule(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_dom_cond_ule( +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M1]] +; CHECK: false: +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %cmp = icmp ule i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @umax_dom_cond_ult(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_dom_cond_ult( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M1]] +; CHECK: false: +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %cmp = icmp ult i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @umin_dom_cond_uge(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_dom_cond_uge( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M1]] +; CHECK: false: +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %cmp = icmp uge i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.umin.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.umin.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @smax_dom_cond_sge(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_dom_cond_sge( +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M1]] +; CHECK: false: +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %cmp = icmp sge i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.smax.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.smax.i8(i8 %x, i8 %y) + ret i8 %m2 +} + +define i8 @smin_dom_cond_sge(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_dom_cond_sge( +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M1]] +; CHECK: false: +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 [[Y]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %cmp = icmp sge i8 %x, %y + br i1 %cmp, label %true, label %false + +true: + %m1 = call i8 @llvm.smin.i8(i8 %x, i8 %y) + ret i8 %m1 + +false: + %m2 = call i8 @llvm.smin.i8(i8 %x, i8 %y) + ret i8 %m2 +} From 7cfc8f0c7c2440ea8aa722304f9e6ef32472833b Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Fri, 4 Sep 2020 14:53:38 -0700 Subject: [PATCH 232/465] [libclang] Add missing dependency on clangRewrite lib Differential Revision: https://reviews.llvm.org/D86992 --- clang/tools/libclang/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/tools/libclang/CMakeLists.txt b/clang/tools/libclang/CMakeLists.txt index 4e2c19da0f7c4..c3b9ab6ffb9b0 100644 --- a/clang/tools/libclang/CMakeLists.txt +++ b/clang/tools/libclang/CMakeLists.txt @@ -42,6 +42,7 @@ set(LIBS clangFrontend clangIndex clangLex + clangRewrite clangSema clangSerialization clangTooling From 2bccd2b4350f887cc7fea1cc488690f58186c440 Mon Sep 17 00:00:00 2001 From: Pengxuan Zheng Date: Fri, 4 Sep 2020 13:56:06 -0700 Subject: [PATCH 233/465] [Driver] Allow -specs and -nostartfiles to be forwarded to GCC With 6a75496836ea14bcfd2f4b59d35a1cad4ac58cee, these two options are no longer forwarded to GCC. This patch restores the original behavior. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D87162 --- clang/include/clang/Driver/Options.td | 4 ++-- clang/test/Driver/gcc_forward.c | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5f1668e701f14..4ba5d40117e77 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2760,7 +2760,7 @@ def no_pie : Flag<["-"], "no-pie">, Alias; def noprebind : Flag<["-"], "noprebind">; def noprofilelib : Flag<["-"], "noprofilelib">; def noseglinkedit : Flag<["-"], "noseglinkedit">; -def nostartfiles : Flag<["-"], "nostartfiles">; +def nostartfiles : Flag<["-"], "nostartfiles">, Group; def nostdinc : Flag<["-"], "nostdinc">, Flags<[CoreOption]>; def nostdlibinc : Flag<["-"], "nostdlibinc">; def nostdincxx : Flag<["-"], "nostdinc++">, Flags<[CC1Option]>, @@ -2861,7 +2861,7 @@ def segs__read__ : Joined<["-"], "segs_read_">; def shared_libgcc : Flag<["-"], "shared-libgcc">; def shared : Flag<["-", "--"], "shared">, Group; def single__module : Flag<["-"], "single_module">; -def specs_EQ : Joined<["-", "--"], "specs=">; +def specs_EQ : Joined<["-", "--"], "specs=">, Group; def specs : Separate<["-", "--"], "specs">, Flags<[Unsupported]>; def static_libgcc : Flag<["-"], "static-libgcc">; def static_libstdcxx : Flag<["-"], "static-libstdc++">; diff --git a/clang/test/Driver/gcc_forward.c b/clang/test/Driver/gcc_forward.c index a99944f8f5336..e6b0670d1a027 100644 --- a/clang/test/Driver/gcc_forward.c +++ b/clang/test/Driver/gcc_forward.c @@ -1,7 +1,8 @@ // RUN: %clang -### %s -target aarch64-none-elf \ -// RUN: --coverage -e _start -fuse-ld=lld --ld-path=ld -nostdlib -r -rdynamic -static -static-pie \ +// RUN: --coverage -e _start -fuse-ld=lld --ld-path=ld -nostartfiles \ +// RUN: -nostdlib -r -rdynamic -specs=nosys.specs -static -static-pie \ // RUN: 2>&1 | FileCheck --check-prefix=FORWARD %s -// FORWARD: gcc{{[^"]*}}" "--coverage" "-fuse-ld=lld" "--ld-path=ld" "-nostdlib" "-rdynamic" "-static" "-static-pie" "-o" "a.out" "{{.*}}.o" "-e" "_start" "-r" +// FORWARD: gcc{{[^"]*}}" "--coverage" "-fuse-ld=lld" "--ld-path=ld" "-nostartfiles" "-nostdlib" "-rdynamic" "-specs=nosys.specs" "-static" "-static-pie" "-o" "a.out" "{{.*}}.o" "-e" "_start" "-r" // Check that we don't try to forward -Xclang or -mlinker-version to GCC. // PR12920 -- Check also we may not forward W_Group options to GCC. From 89a4fe79d4dde50a1daee1b433b1fa728ffeb0a4 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 4 Sep 2020 17:17:55 -0500 Subject: [PATCH 234/465] [Hexagon] Unindent everything in HexagonISelLowering.h, NFC Just a shift, no other formatting changes. --- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 896 +++++++++--------- 1 file changed, 448 insertions(+), 448 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index d7a960fde0a20..c12c3f6cd8ee5 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -30,465 +30,465 @@ namespace llvm { namespace HexagonISD { - enum NodeType : unsigned { - OP_BEGIN = ISD::BUILTIN_OP_END, - - CONST32 = OP_BEGIN, - CONST32_GP, // For marking data present in GP. - ADDC, // Add with carry: (X, Y, Cin) -> (X+Y, Cout). - SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout). - ALLOCA, - - AT_GOT, // Index in GOT. - AT_PCREL, // Offset relative to PC. - - CALL, // Function call. - CALLnr, // Function call that does not return. - CALLR, - - RET_FLAG, // Return with a flag operand. - BARRIER, // Memory barrier. - JT, // Jump table. - CP, // Constant pool. - - COMBINE, - VSPLAT, // Generic splat, selection depends on argument/return - // types. - VASL, - VASR, - VLSR, - - TSTBIT, - INSERT, - EXTRACTU, - VEXTRACTW, - VINSERTW0, - VROR, - TC_RETURN, - EH_RETURN, - DCFETCH, - READCYCLE, - PTRUE, - PFALSE, - D2P, // Convert 8-byte value to 8-bit predicate register. [*] - P2D, // Convert 8-bit predicate register to 8-byte value. [*] - V2Q, // Convert HVX vector to a vector predicate reg. [*] - Q2V, // Convert vector predicate to an HVX vector. [*] - // [*] The equivalence is defined as "Q <=> (V != 0)", - // where the != operation compares bytes. - // Note: V != 0 is implemented as V >u 0. - QCAT, - QTRUE, - QFALSE, - VZERO, - VSPLATW, // HVX splat of a 32-bit word with an arbitrary result type. - TYPECAST, // No-op that's used to convert between different legal - // types in a register. - VALIGN, // Align two vectors (in Op0, Op1) to one that would have - // been loaded from address in Op2. - VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is - // an address in a vector load, then it's a no-op. - OP_END - }; +enum NodeType : unsigned { + OP_BEGIN = ISD::BUILTIN_OP_END, + + CONST32 = OP_BEGIN, + CONST32_GP, // For marking data present in GP. + ADDC, // Add with carry: (X, Y, Cin) -> (X+Y, Cout). + SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout). + ALLOCA, + + AT_GOT, // Index in GOT. + AT_PCREL, // Offset relative to PC. + + CALL, // Function call. + CALLnr, // Function call that does not return. + CALLR, + + RET_FLAG, // Return with a flag operand. + BARRIER, // Memory barrier. + JT, // Jump table. + CP, // Constant pool. + + COMBINE, + VSPLAT, // Generic splat, selection depends on argument/return + // types. + VASL, + VASR, + VLSR, + + TSTBIT, + INSERT, + EXTRACTU, + VEXTRACTW, + VINSERTW0, + VROR, + TC_RETURN, + EH_RETURN, + DCFETCH, + READCYCLE, + PTRUE, + PFALSE, + D2P, // Convert 8-byte value to 8-bit predicate register. [*] + P2D, // Convert 8-bit predicate register to 8-byte value. [*] + V2Q, // Convert HVX vector to a vector predicate reg. [*] + Q2V, // Convert vector predicate to an HVX vector. [*] + // [*] The equivalence is defined as "Q <=> (V != 0)", + // where the != operation compares bytes. + // Note: V != 0 is implemented as V >u 0. + QCAT, + QTRUE, + QFALSE, + VZERO, + VSPLATW, // HVX splat of a 32-bit word with an arbitrary result type. + TYPECAST, // No-op that's used to convert between different legal + // types in a register. + VALIGN, // Align two vectors (in Op0, Op1) to one that would have + // been loaded from address in Op2. + VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is + // an address in a vector load, then it's a no-op. + OP_END +}; } // end namespace HexagonISD - class HexagonSubtarget; - - class HexagonTargetLowering : public TargetLowering { - int VarArgsFrameOffset; // Frame offset to start of varargs area. - const HexagonTargetMachine &HTM; - const HexagonSubtarget &Subtarget; - - bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize) - const; - - public: - explicit HexagonTargetLowering(const TargetMachine &TM, - const HexagonSubtarget &ST); - - bool isHVXVectorType(MVT Ty) const; - - /// IsEligibleForTailCallOptimization - Check whether the call is eligible - /// for tail call optimization. Targets which want to do tail call - /// optimization should implement this function. - bool IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, - bool isCallerStructRet, const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, SelectionDAG& DAG) const; - - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, - MachineFunction &MF, - unsigned Intrinsic) const override; - - bool isTruncateFree(Type *Ty1, Type *Ty2) const override; - bool isTruncateFree(EVT VT1, EVT VT2) const override; - - bool isCheapToSpeculateCttz() const override { return true; } - bool isCheapToSpeculateCtlz() const override { return true; } - bool isCtlzFast() const override { return true; } - - bool hasBitTest(SDValue X, SDValue Y) const override; - - bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; - - /// Return true if an FMA operation is faster than a pair of mul and add - /// instructions. fmuladd intrinsics will be expanded to FMAs when this - /// method returns true (and FMAs are legal), otherwise fmuladd is - /// expanded to mul + add. - bool isFMAFasterThanFMulAndFAdd(const MachineFunction &, - EVT) const override; - - // Should we expand the build vector with shuffles? - bool shouldExpandBuildVectorWithShuffles(EVT VT, - unsigned DefinedValues) const override; - - bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override; - TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) - const override; - - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - void LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const override; - void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const override; - - const char *getTargetNodeName(unsigned Opcode) const override; - - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; - SDValue - LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - const SDLoc &dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const override; - SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG) const; - SDValue LowerToTLSInitialExecModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG) const; - SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG) const; - SDValue GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain, - GlobalAddressSDNode *GA, SDValue InFlag, EVT PtrVT, - unsigned ReturnReg, unsigned char OperandFlags) const; - SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const override; - SDValue LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - const SDLoc &dl, SelectionDAG &DAG, - SmallVectorImpl &InVals, - const SmallVectorImpl &OutVals, - SDValue Callee) const; - - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - - bool CanLowerReturn(CallingConv::ID CallConv, - MachineFunction &MF, bool isVarArg, - const SmallVectorImpl &Outs, - LLVMContext &Context) const override; - - SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SDLoc &dl, SelectionDAG &DAG) const override; - - SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - - bool mayBeEmittedAsTailCall(const CallInst *CI) const override; - - Register getRegisterByName(const char* RegName, LLT VT, - const MachineFunction &MF) const override; - - /// If a physical register, this returns the register that receives the - /// exception address on entry to an EH pad. - Register - getExceptionPointerRegister(const Constant *PersonalityFn) const override { - return Hexagon::R0; - } - - /// If a physical register, this returns the register that receives the - /// exception typeid on entry to a landing pad. - Register - getExceptionSelectorRegister(const Constant *PersonalityFn) const override { - return Hexagon::R1; - } - - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - - EVT getSetCCResultType(const DataLayout &, LLVMContext &C, - EVT VT) const override { - if (!VT.isVector()) - return MVT::i1; - else - return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); - } - - bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, - SDValue &Base, SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const override; - - ConstraintType getConstraintType(StringRef Constraint) const override; - - std::pair - getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - StringRef Constraint, MVT VT) const override; - - unsigned - getInlineAsmMemConstraint(StringRef ConstraintCode) const override { - if (ConstraintCode == "o") - return InlineAsm::Constraint_o; - return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); - } - - // Intrinsics - SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; - /// isLegalAddressingMode - Return true if the addressing mode represented - /// by AM is legal for this target, for a load/store of the specified type. - /// The type may be VoidTy, in which case only return true if the addressing - /// mode is legal for a load/store of any legal type. - /// TODO: Handle pre/postinc as well. - bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS, - Instruction *I = nullptr) const override; - /// Return true if folding a constant offset with the given GlobalAddress - /// is legal. It is frequently not legal in PIC relocation models. - bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - - bool isFPImmLegal(const APFloat &Imm, EVT VT, - bool ForCodeSize) const override; - - /// isLegalICmpImmediate - Return true if the specified immediate is legal - /// icmp immediate, that is the target has icmp instructions which can - /// compare a register against the immediate without having to materialize - /// the immediate into a register. - bool isLegalICmpImmediate(int64_t Imm) const override; - - EVT getOptimalMemOpType(const MemOp &Op, - const AttributeList &FuncAttributes) const override; - - bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, - unsigned AddrSpace, Align Alignment, - MachineMemOperand::Flags Flags, - bool *Fast) const override; - - bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) - const override; - - /// Returns relocation base for the given PIC jumptable. - SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) - const override; - - bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, - EVT NewVT) const override; - - // Handling of atomic RMW instructions. - Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, - AtomicOrdering Ord) const override; - Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, - Value *Addr, AtomicOrdering Ord) const override; - AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; - bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - AtomicExpansionKind - shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; - - AtomicExpansionKind - shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { - return AtomicExpansionKind::LLSC; - } - - private: - void initializeHVXLowering(); - unsigned getPreferredHvxVectorAction(MVT VecTy) const; - - void validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl, - unsigned NeedAlign) const; - - std::pair getBaseAndOffset(SDValue Addr) const; - - bool getBuildVectorConstInts(ArrayRef Values, MVT VecTy, - SelectionDAG &DAG, - MutableArrayRef Consts) const; - SDValue buildVector32(ArrayRef Elem, const SDLoc &dl, MVT VecTy, +class HexagonSubtarget; + +class HexagonTargetLowering : public TargetLowering { + int VarArgsFrameOffset; // Frame offset to start of varargs area. + const HexagonTargetMachine &HTM; + const HexagonSubtarget &Subtarget; + + bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize) + const; + +public: + explicit HexagonTargetLowering(const TargetMachine &TM, + const HexagonSubtarget &ST); + + bool isHVXVectorType(MVT Ty) const; + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, + bool isCallerStructRet, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, SelectionDAG& DAG) const; + + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const override; + + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; + bool isTruncateFree(EVT VT1, EVT VT2) const override; + + bool isCheapToSpeculateCttz() const override { return true; } + bool isCheapToSpeculateCtlz() const override { return true; } + bool isCtlzFast() const override { return true; } + + bool hasBitTest(SDValue X, SDValue Y) const override; + + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; + + /// Return true if an FMA operation is faster than a pair of mul and add + /// instructions. fmuladd intrinsics will be expanded to FMAs when this + /// method returns true (and FMAs are legal), otherwise fmuladd is + /// expanded to mul + add. + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &, + EVT) const override; + + // Should we expand the build vector with shuffles? + bool shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const override; + + bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override; + TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) + const override; + + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + void LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + + const char *getTargetNodeName(unsigned Opcode) const override; + + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; + SDValue LowerToTLSInitialExecModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; + SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; + SDValue GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain, + GlobalAddressSDNode *GA, SDValue InFlag, EVT PtrVT, + unsigned ReturnReg, unsigned char OperandFlags) const; + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals, + const SmallVectorImpl &OutVals, + SDValue Callee) const; + + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + + bool CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &dl, SelectionDAG &DAG) const override; + + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + + Register getRegisterByName(const char* RegName, LLT VT, + const MachineFunction &MF) const override; + + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + Register + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return Hexagon::R0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + Register + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return Hexagon::R1; + } + + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + + EVT getSetCCResultType(const DataLayout &, LLVMContext &C, + EVT VT) const override { + if (!VT.isVector()) + return MVT::i1; + else + return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); + } + + bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; + + ConstraintType getConstraintType(StringRef Constraint) const override; + + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { + if (ConstraintCode == "o") + return InlineAsm::Constraint_o; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + + // Intrinsics + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + /// The type may be VoidTy, in which case only return true if the addressing + /// mode is legal for a load/store of any legal type. + /// TODO: Handle pre/postinc as well. + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; + /// Return true if folding a constant offset with the given GlobalAddress + /// is legal. It is frequently not legal in PIC relocation models. + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; + + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. + bool isLegalICmpImmediate(int64_t Imm) const override; + + EVT getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const override; + + bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, + unsigned AddrSpace, Align Alignment, + MachineMemOperand::Flags Flags, + bool *Fast) const override; + + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, + unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) + const override; + + /// Returns relocation base for the given PIC jumptable. + SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) + const override; + + bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, + EVT NewVT) const override; + + // Handling of atomic RMW instructions. + Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const override; + Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, AtomicOrdering Ord) const override; + AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + AtomicExpansionKind + shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; + + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { + return AtomicExpansionKind::LLSC; + } + +private: + void initializeHVXLowering(); + unsigned getPreferredHvxVectorAction(MVT VecTy) const; + + void validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl, + unsigned NeedAlign) const; + + std::pair getBaseAndOffset(SDValue Addr) const; + + bool getBuildVectorConstInts(ArrayRef Values, MVT VecTy, + SelectionDAG &DAG, + MutableArrayRef Consts) const; + SDValue buildVector32(ArrayRef Elem, const SDLoc &dl, MVT VecTy, + SelectionDAG &DAG) const; + SDValue buildVector64(ArrayRef Elem, const SDLoc &dl, MVT VecTy, + SelectionDAG &DAG) const; + SDValue extractVector(SDValue VecV, SDValue IdxV, const SDLoc &dl, + MVT ValTy, MVT ResTy, SelectionDAG &DAG) const; + SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, + const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const; + SDValue expandPredicate(SDValue Vec32, const SDLoc &dl, SelectionDAG &DAG) const; - SDValue buildVector64(ArrayRef Elem, const SDLoc &dl, MVT VecTy, - SelectionDAG &DAG) const; - SDValue extractVector(SDValue VecV, SDValue IdxV, const SDLoc &dl, - MVT ValTy, MVT ResTy, SelectionDAG &DAG) const; - SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, - const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const; - SDValue expandPredicate(SDValue Vec32, const SDLoc &dl, + SDValue contractPredicate(SDValue Vec64, const SDLoc &dl, SelectionDAG &DAG) const; - SDValue contractPredicate(SDValue Vec64, const SDLoc &dl, + SDValue getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) const; + + bool isUndef(SDValue Op) const { + if (Op.isMachineOpcode()) + return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; + return Op.getOpcode() == ISD::UNDEF; + } + SDValue getInstr(unsigned MachineOpc, const SDLoc &dl, MVT Ty, + ArrayRef Ops, SelectionDAG &DAG) const { + SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops); + return SDValue(N, 0); + } + SDValue getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) const; + + using VectorPair = std::pair; + using TypePair = std::pair; + + SDValue getInt(unsigned IntId, MVT ResTy, ArrayRef Ops, + const SDLoc &dl, SelectionDAG &DAG) const; + + MVT ty(SDValue Op) const { + return Op.getValueType().getSimpleVT(); + } + TypePair ty(const VectorPair &Ops) const { + return { Ops.first.getValueType().getSimpleVT(), + Ops.second.getValueType().getSimpleVT() }; + } + MVT tyScalar(MVT Ty) const { + if (!Ty.isVector()) + return Ty; + return MVT::getIntegerVT(Ty.getSizeInBits()); + } + MVT tyVector(MVT Ty, MVT ElemTy) const { + if (Ty.isVector() && Ty.getVectorElementType() == ElemTy) + return Ty; + unsigned TyWidth = Ty.getSizeInBits(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + assert((TyWidth % ElemWidth) == 0); + return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth); + } + + MVT typeJoin(const TypePair &Tys) const; + TypePair typeSplit(MVT Ty) const; + MVT typeExtElem(MVT VecTy, unsigned Factor) const; + MVT typeTruncElem(MVT VecTy, unsigned Factor) const; + + SDValue opJoin(const VectorPair &Ops, const SDLoc &dl, + SelectionDAG &DAG) const; + VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const; + SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const; + + bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags, + bool *Fast) const; + bool allowsHvxMisalignedMemoryAccesses(MVT VecTy, + MachineMemOperand::Flags Flags, + bool *Fast) const; + + bool isHvxSingleTy(MVT Ty) const; + bool isHvxPairTy(MVT Ty) const; + bool isHvxBoolTy(MVT Ty) const; + SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy, + SelectionDAG &DAG) const; + SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const; + SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1, + ArrayRef Mask, SelectionDAG &DAG) const; + + SDValue buildHvxVectorReg(ArrayRef Values, const SDLoc &dl, + MVT VecTy, SelectionDAG &DAG) const; + SDValue buildHvxVectorPred(ArrayRef Values, const SDLoc &dl, + MVT VecTy, SelectionDAG &DAG) const; + SDValue createHvxPrefixPred(SDValue PredV, const SDLoc &dl, + unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const; - SDValue getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) const; - - bool isUndef(SDValue Op) const { - if (Op.isMachineOpcode()) - return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; - return Op.getOpcode() == ISD::UNDEF; - } - SDValue getInstr(unsigned MachineOpc, const SDLoc &dl, MVT Ty, - ArrayRef Ops, SelectionDAG &DAG) const { - SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops); - return SDValue(N, 0); - } - SDValue getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) const; - - using VectorPair = std::pair; - using TypePair = std::pair; - - SDValue getInt(unsigned IntId, MVT ResTy, ArrayRef Ops, - const SDLoc &dl, SelectionDAG &DAG) const; - - MVT ty(SDValue Op) const { - return Op.getValueType().getSimpleVT(); - } - TypePair ty(const VectorPair &Ops) const { - return { Ops.first.getValueType().getSimpleVT(), - Ops.second.getValueType().getSimpleVT() }; - } - MVT tyScalar(MVT Ty) const { - if (!Ty.isVector()) - return Ty; - return MVT::getIntegerVT(Ty.getSizeInBits()); - } - MVT tyVector(MVT Ty, MVT ElemTy) const { - if (Ty.isVector() && Ty.getVectorElementType() == ElemTy) - return Ty; - unsigned TyWidth = Ty.getSizeInBits(); - unsigned ElemWidth = ElemTy.getSizeInBits(); - assert((TyWidth % ElemWidth) == 0); - return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth); - } - - MVT typeJoin(const TypePair &Tys) const; - TypePair typeSplit(MVT Ty) const; - MVT typeExtElem(MVT VecTy, unsigned Factor) const; - MVT typeTruncElem(MVT VecTy, unsigned Factor) const; - - SDValue opJoin(const VectorPair &Ops, const SDLoc &dl, - SelectionDAG &DAG) const; - VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const; - SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const; - - bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags, - bool *Fast) const; - bool allowsHvxMisalignedMemoryAccesses(MVT VecTy, - MachineMemOperand::Flags Flags, - bool *Fast) const; - - bool isHvxSingleTy(MVT Ty) const; - bool isHvxPairTy(MVT Ty) const; - bool isHvxBoolTy(MVT Ty) const; - SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy, - SelectionDAG &DAG) const; - SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const; - SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1, - ArrayRef Mask, SelectionDAG &DAG) const; - - SDValue buildHvxVectorReg(ArrayRef Values, const SDLoc &dl, - MVT VecTy, SelectionDAG &DAG) const; - SDValue buildHvxVectorPred(ArrayRef Values, const SDLoc &dl, - MVT VecTy, SelectionDAG &DAG) const; - SDValue createHvxPrefixPred(SDValue PredV, const SDLoc &dl, - unsigned BitBytes, bool ZeroFill, - SelectionDAG &DAG) const; - SDValue extractHvxElementReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, + SDValue extractHvxElementReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, + MVT ResTy, SelectionDAG &DAG) const; + SDValue extractHvxElementPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, + MVT ResTy, SelectionDAG &DAG) const; + SDValue insertHvxElementReg(SDValue VecV, SDValue IdxV, SDValue ValV, + const SDLoc &dl, SelectionDAG &DAG) const; + SDValue insertHvxElementPred(SDValue VecV, SDValue IdxV, SDValue ValV, + const SDLoc &dl, SelectionDAG &DAG) const; + SDValue extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const; - SDValue extractHvxElementPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, + SDValue extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const; - SDValue insertHvxElementReg(SDValue VecV, SDValue IdxV, SDValue ValV, + SDValue insertHvxSubvectorReg(SDValue VecV, SDValue SubV, SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const; - SDValue insertHvxElementPred(SDValue VecV, SDValue IdxV, SDValue ValV, + SDValue insertHvxSubvectorPred(SDValue VecV, SDValue SubV, SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const; - SDValue extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, - MVT ResTy, SelectionDAG &DAG) const; - SDValue extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, - MVT ResTy, SelectionDAG &DAG) const; - SDValue insertHvxSubvectorReg(SDValue VecV, SDValue SubV, SDValue IdxV, - const SDLoc &dl, SelectionDAG &DAG) const; - SDValue insertHvxSubvectorPred(SDValue VecV, SDValue SubV, SDValue IdxV, - const SDLoc &dl, SelectionDAG &DAG) const; - SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy, - bool ZeroExt, SelectionDAG &DAG) const; - SDValue compressHvxPred(SDValue VecQ, const SDLoc &dl, MVT ResTy, - SelectionDAG &DAG) const; + SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy, + bool ZeroExt, SelectionDAG &DAG) const; + SDValue compressHvxPred(SDValue VecQ, const SDLoc &dl, MVT ResTy, + SelectionDAG &DAG) const; - SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const; - - SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const; - SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const; - SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const; - - std::pair - findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) - const override; - - bool isHvxOperation(SDNode *N) const; - SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; - void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const; - void ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const; - SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - }; + SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const; + + SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const; + SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const; + SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const; + + std::pair + findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) + const override; + + bool isHvxOperation(SDNode *N) const; + SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; + void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const; + void ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const; + SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; +}; } // end namespace llvm From 5e04b539c835cdc810cf0f75e87ca0c9b81d00e4 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 4 Sep 2020 15:48:27 -0700 Subject: [PATCH 235/465] [compiler-rt] Try again to correct test after 3f1a9b7eca0 added segment names to objdump output One check was missed on the previous attempt --- compiler-rt/test/asan/TestCases/Darwin/cstring_section.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c b/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c index 958d7777111c4..d72b0ba8a8bb3 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c +++ b/compiler-rt/test/asan/TestCases/Darwin/cstring_section.c @@ -4,7 +4,7 @@ // RUN: llvm-objdump -s %t | FileCheck %s // Check that "Hello.\n" is in __asan_cstring and not in __cstring. -// CHECK: Contents of section __asan_cstring: +// CHECK: Contents of section {{.*}}__asan_cstring: // CHECK: 48656c6c {{.*}} Hello. // CHECK: Contents of section {{.*}}__const: // CHECK-NOT: 48656c6c {{.*}} Hello. From a5d6af421d625c78bfb0f63830b51863ff0f0877 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 4 Sep 2020 16:05:20 -0700 Subject: [PATCH 236/465] [compiler-rt] Add .clang-tidy with customization to disable readability-identifier-naming Copied from lldb/.clang-tidy (D75810). Most compiler-rt code actually uses variableName or variable_name but not VariableName. Lots of functions use `__function_name` and FunctionName instead of functionName. Just exclude readability-identifier-naming. --- compiler-rt/.clang-tidy | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 compiler-rt/.clang-tidy diff --git a/compiler-rt/.clang-tidy b/compiler-rt/.clang-tidy new file mode 100644 index 0000000000000..e949902171e7f --- /dev/null +++ b/compiler-rt/.clang-tidy @@ -0,0 +1,2 @@ +# Checks enabled in the top-level .clang-tidy minus readability-identifier-naming +Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes' From 2d652949be4b772f2c11577621b0ad33052ac844 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Fri, 4 Sep 2020 21:44:37 -0400 Subject: [PATCH 237/465] [PowerPC] Provide vec_cmpne on pre-Power9 architectures in altivec.h These overloads are listed in appendix A of the ELFv2 ABI specification without a requirement for ISA 3.0. So these need to be available on all Altivec-capable architectures. The implementation in altivec.h erroneously had them guarded for Power9 due to the availability of the VCMPNE[BHW] instructions. However these need to be implemented in terms of the VCMPEQ[BHW] instructions on older architectures. Fixes: https://bugs.llvm.org/show_bug.cgi?id=47423 --- clang/lib/Headers/altivec.h | 104 +++++++++++++++++----- clang/test/CodeGen/builtins-ppc-altivec.c | 79 ++++++++++++++++ 2 files changed, 159 insertions(+), 24 deletions(-) diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 9fda383074f6b..a7c4fd23ef19b 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -1766,36 +1766,12 @@ vec_cmpne(vector unsigned int __a, vector unsigned int __b) { (vector int)__b); } -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector bool long long __a, vector bool long long __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); -} - -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector signed long long __a, vector signed long long __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); -} - -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); -} - static __inline__ vector bool int __ATTRS_o_ai vec_cmpne(vector float __a, vector float __b) { return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a, (vector int)__b); } -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector double __a, vector double __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); -} - /* vec_cmpnez */ static __inline__ vector bool char __ATTRS_o_ai @@ -1900,6 +1876,86 @@ vec_parity_lsbb(vector signed long long __a) { return __builtin_altivec_vprtybd(__a); } +#else +/* vec_cmpne */ + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector bool char __a, vector bool char __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector signed char __a, vector signed char __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector unsigned char __a, vector unsigned char __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector bool short __a, vector bool short __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector signed short __a, vector signed short __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector unsigned short __a, vector unsigned short __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector bool int __a, vector bool int __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector signed int __a, vector signed int __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector unsigned int __a, vector unsigned int __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector float __a, vector float __b) { + return ~(vec_cmpeq(__a, __b)); +} +#endif + +#ifdef __POWER8_VECTOR__ +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector bool long long __a, vector bool long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} + +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector signed long long __a, vector signed long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} + +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} +#endif + +#ifdef __VSX__ +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector double __a, vector double __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} #endif /* vec_cmpgt */ diff --git a/clang/test/CodeGen/builtins-ppc-altivec.c b/clang/test/CodeGen/builtins-ppc-altivec.c index 1212f594ee8cb..06f70a9019039 100644 --- a/clang/test/CodeGen/builtins-ppc-altivec.c +++ b/clang/test/CodeGen/builtins-ppc-altivec.c @@ -1036,6 +1036,85 @@ void test2() { // CHECK: @llvm.ppc.altivec.vcmpeqfp // CHECK-LE: @llvm.ppc.altivec.vcmpeqfp + /* vec_cmpne */ + res_vbc = vec_cmpne(vsc, vsc); +// CHECK: @llvm.ppc.altivec.vcmpequb +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequb +// CHECK-LE: xor + + res_vbc = vec_cmpne(vuc, vuc); +// CHECK: @llvm.ppc.altivec.vcmpequb +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequb +// CHECK-LE: xor + + res_vbc = vec_cmpne(vbc, vbc); +// CHECK: @llvm.ppc.altivec.vcmpequb +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequb +// CHECK-LE: xor + + res_vbc = vec_cmpne(vbc, vbc); +// CHECK: @llvm.ppc.altivec.vcmpequb +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequb +// CHECK-LE: xor + + res_vbs = vec_cmpne(vs, vs); +// CHECK: @llvm.ppc.altivec.vcmpequh +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequh +// CHECK-LE: xor + + res_vbs = vec_cmpne(vus, vus); +// CHECK: @llvm.ppc.altivec.vcmpequh +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequh +// CHECK-LE: xor + + res_vbs = vec_cmpne(vbs, vbs); +// CHECK: @llvm.ppc.altivec.vcmpequh +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequh +// CHECK-LE: xor + + res_vbs = vec_cmpne(vbs, vbs); +// CHECK: @llvm.ppc.altivec.vcmpequh +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequh +// CHECK-LE: xor + + res_vbi = vec_cmpne(vi, vi); +// CHECK: @llvm.ppc.altivec.vcmpequw +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequw +// CHECK-LE: xor + + res_vbi = vec_cmpne(vui, vui); +// CHECK: @llvm.ppc.altivec.vcmpequw +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequw +// CHECK-LE: xor + + res_vbi = vec_cmpne(vbi, vbi); +// CHECK: @llvm.ppc.altivec.vcmpequw +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequw +// CHECK-LE: xor + + res_vbi = vec_cmpne(vbi, vbi); +// CHECK: @llvm.ppc.altivec.vcmpequw +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpequw +// CHECK-LE: xor + + res_vbi = vec_cmpne(vf, vf); +// CHECK: @llvm.ppc.altivec.vcmpeqfp +// CHECK: xor +// CHECK-LE: @llvm.ppc.altivec.vcmpeqfp +// CHECK-LE: xor + /* vec_cmpge */ res_vbc = vec_cmpge(vsc, vsc); // CHECK: @llvm.ppc.altivec.vcmpgtsb From 705271d9cd0e981b2df41cf2802880dcd5925281 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Sat, 5 Sep 2020 13:16:20 +0800 Subject: [PATCH 238/465] [PowerPC] Expand constrained ppc_fp128 to i32 conversion Libcall __gcc_qtou is not available, which breaks some tests needing it. On PowerPC, we have code to manually expand the operation, this patch applies it to constrained conversion. To keep it strict-safe, it's using the algorithm similar to expandFP_TO_UINT. For constrained operations marking FP exception behavior as 'ignore', we should set the NoFPExcept flag. However, in some custom lowering the flag is missed. This should be fixed by future patches. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D86605 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 94 +++++++-- llvm/lib/Target/PowerPC/PPCISelLowering.h | 3 + llvm/lib/Target/PowerPC/PPCInstrInfo.td | 9 +- .../CodeGen/PowerPC/fp-strict-conv-f128.ll | 180 +++++++++++++----- .../ppcf128-constrained-fp-intrinsics.ll | 149 +++++++++++---- 5 files changed, 336 insertions(+), 99 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 4fe29f7f29944..b213abb57aa83 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -262,6 +262,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // PPC (the libcall is not available). setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom); // We do not currently implement these libm ops for PowerPC. setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); @@ -1505,6 +1507,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; + case PPCISD::STRICT_FADDRTZ: + return "PPCISD::STRICT_FADDRTZ"; case PPCISD::STRICT_FCTIDZ: return "PPCISD::STRICT_FCTIDZ"; case PPCISD::STRICT_FCTIWZ: @@ -8164,38 +8168,86 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || Op.getOpcode() == ISD::STRICT_FP_TO_SINT; SDValue Src = Op.getOperand(IsStrict ? 1 : 0); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + // FP to INT conversions are legal for f128. - if (Src.getValueType() == MVT::f128) + if (SrcVT == MVT::f128) return Op; // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). - if (Src.getValueType() == MVT::ppcf128 && !IsStrict) { - if (Op.getValueType() == MVT::i32) { + if (SrcVT == MVT::ppcf128) { + if (DstVT == MVT::i32) { + // TODO: Conservatively pass only nofpexcept flag here. Need to check and + // set other fast-math flags to FP operations in both strict and + // non-strict cases. (FP_TO_SINT, FSUB) + SDNodeFlags Flags; + Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); + if (IsSigned) { SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(0, dl)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(1, dl)); - // Add the two halves of the long double in round-to-zero mode. - SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); - - // Now use a smaller FP_TO_SINT. - return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + // Add the two halves of the long double in round-to-zero mode, and use + // a smaller FP_TO_SINT. + if (IsStrict) { + SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl, + DAG.getVTList(MVT::f64, MVT::Other), + {Op.getOperand(0), Lo, Hi}, Flags); + return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, + DAG.getVTList(MVT::i32, MVT::Other), + {Res.getValue(1), Res}, Flags); + } else { + SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); + return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + } } else { const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); - SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); - // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X - // FIXME: generated code sucks. - // TODO: Are there fast-math-flags to propagate to this FSUB? - SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Tmp); - True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); - True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, - DAG.getConstant(0x80000000, dl, MVT::i32)); - SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); - return DAG.getSelectCC(dl, Src, Tmp, True, False, ISD::SETGE); + SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); + SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT); + if (IsStrict) { + // Sel = Src < 0x80000000 + // FltOfs = select Sel, 0.0, 0x80000000 + // IntOfs = select Sel, 0, 0x80000000 + // Result = fp_to_sint(Src - FltOfs) ^ IntOfs + SDValue Chain = Op.getOperand(0); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); + EVT DstSetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); + SDValue Sel = + DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, Chain, true); + Chain = Sel.getValue(1); + + SDValue FltOfs = DAG.getSelect( + dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst); + Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); + + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, + DAG.getVTList(SrcVT, MVT::Other), + {Chain, Src, FltOfs}, Flags); + Chain = Val.getValue(1); + SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, + DAG.getVTList(DstVT, MVT::Other), + {Chain, Val}, Flags); + Chain = SInt.getValue(1); + SDValue IntOfs = DAG.getSelect( + dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask); + SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); + return DAG.getMergeValues({Result, Chain}, dl); + } else { + // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X + // FIXME: generated code sucks. + SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst); + True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); + True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask); + SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); + return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE); + } } } @@ -12170,7 +12222,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(PPC::RM, RegState::ImplicitDefine); // Perform addition. - BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); + auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest) + .addReg(Src1) + .addReg(Src2); + if (MI.getFlag(MachineInstr::NoFPExcept)) + MIB.setMIFlag(MachineInstr::NoFPExcept); // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index f51cd2823fcdd..05c9a5d314133 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -453,6 +453,9 @@ namespace llvm { STRICT_FCFIDS, STRICT_FCFIDUS, + /// Constrained floating point add in round-to-zero mode. + STRICT_FADDRTZ, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 04ecb72a5ccd5..a6932005d5ad1 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -182,7 +182,12 @@ def PPCmffs : SDNode<"PPCISD::MFFS", // Perform FADD in round-to-zero mode. def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; +def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp, + [SDNPHasChain]>; +def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs), + [(PPCfaddrtz node:$lhs, node:$rhs), + (PPCstrict_faddrtz node:$lhs, node:$rhs)]>; def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. @@ -2960,9 +2965,9 @@ def : InstAlias<"mtcr $rA", (MTCRF 255, gprc:$rA)>; let Predicates = [HasFPU] in { // Custom inserter instruction to perform FADD in round-to-zero mode. -let Uses = [RM] in { +let Uses = [RM], mayRaiseFPException = 1 in { def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "", - [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>; + [(set f64:$FRT, (PPCany_faddrtz f64:$FRA, f64:$FRB))]>; } // The above pseudo gets expanded to make use of the following instructions diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll index d8ef98c149f6a..b4927f3da0637 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -403,47 +403,39 @@ entry: define signext i32 @ppcq_to_i32(ppc_fp128 %m) #0 { ; P8-LABEL: ppcq_to_i32: ; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: .cfi_def_cfa_offset 112 -; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: bl __gcc_qtou -; P8-NEXT: nop +; P8-NEXT: mffs f0 +; P8-NEXT: mtfsb1 31 +; P8-NEXT: mtfsb0 30 +; P8-NEXT: fadd f1, f2, f1 +; P8-NEXT: mtfsf 1, f0 +; P8-NEXT: xscvdpsxws f0, f1 +; P8-NEXT: mffprwz r3, f0 ; P8-NEXT: extsw r3, r3 -; P8-NEXT: addi r1, r1, 112 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; ; P9-LABEL: ppcq_to_i32: ; P9: # %bb.0: # %entry -; P9-NEXT: mflr r0 -; P9-NEXT: std r0, 16(r1) -; P9-NEXT: stdu r1, -32(r1) -; P9-NEXT: .cfi_def_cfa_offset 32 -; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: bl __gcc_qtou -; P9-NEXT: nop +; P9-NEXT: mffs f0 +; P9-NEXT: mtfsb1 31 +; P9-NEXT: mtfsb0 30 +; P9-NEXT: fadd f1, f2, f1 +; P9-NEXT: mtfsf 1, f0 +; P9-NEXT: xscvdpsxws f0, f1 +; P9-NEXT: mffprwz r3, f0 ; P9-NEXT: extsw r3, r3 -; P9-NEXT: addi r1, r1, 32 -; P9-NEXT: ld r0, 16(r1) -; P9-NEXT: mtlr r0 ; P9-NEXT: blr ; ; NOVSX-LABEL: ppcq_to_i32: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: mflr r0 -; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -32(r1) -; NOVSX-NEXT: .cfi_def_cfa_offset 32 -; NOVSX-NEXT: .cfi_offset lr, 16 -; NOVSX-NEXT: bl __gcc_qtou -; NOVSX-NEXT: nop -; NOVSX-NEXT: extsw r3, r3 -; NOVSX-NEXT: addi r1, r1, 32 -; NOVSX-NEXT: ld r0, 16(r1) -; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: mffs f0 +; NOVSX-NEXT: mtfsb1 31 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: mtfsb0 30 +; NOVSX-NEXT: fadd f1, f2, f1 +; NOVSX-NEXT: mtfsf 1, f0 +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwa r3, -4(r1) ; NOVSX-NEXT: blr entry: %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 @@ -549,12 +541,40 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 { ; P8: # %bb.0: # %entry ; P8-NEXT: mflr r0 ; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: stdu r1, -128(r1) +; P8-NEXT: .cfi_def_cfa_offset 128 ; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: bl __fixunstfsi +; P8-NEXT: .cfi_offset r30, -16 +; P8-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; P8-NEXT: xxlxor f3, f3, f3 +; P8-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; P8-NEXT: lfs f0, .LCPI11_0@toc@l(r3) +; P8-NEXT: fcmpo cr0, f2, f3 +; P8-NEXT: lis r3, -32768 +; P8-NEXT: xxlxor f3, f3, f3 +; P8-NEXT: fcmpo cr1, f1, f0 +; P8-NEXT: crand 4*cr5+lt, 4*cr1+eq, lt +; P8-NEXT: crandc 4*cr5+gt, 4*cr1+lt, 4*cr1+eq +; P8-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt +; P8-NEXT: isel r30, 0, r3, 4*cr5+lt +; P8-NEXT: bc 12, 4*cr5+lt, .LBB11_2 +; P8-NEXT: # %bb.1: # %entry +; P8-NEXT: fmr f3, f0 +; P8-NEXT: .LBB11_2: # %entry +; P8-NEXT: xxlxor f4, f4, f4 +; P8-NEXT: bl __gcc_qsub ; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: mffs f0 +; P8-NEXT: mtfsb1 31 +; P8-NEXT: mtfsb0 30 +; P8-NEXT: fadd f1, f2, f1 +; P8-NEXT: mtfsf 1, f0 +; P8-NEXT: xscvdpsxws f0, f1 +; P8-NEXT: mffprwz r3, f0 +; P8-NEXT: xor r3, r3, r30 +; P8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; P8-NEXT: clrldi r3, r3, 32 +; P8-NEXT: addi r1, r1, 128 ; P8-NEXT: ld r0, 16(r1) ; P8-NEXT: mtlr r0 ; P8-NEXT: blr @@ -562,28 +582,88 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 { ; P9-LABEL: ppcq_to_u32: ; P9: # %bb.0: # %entry ; P9-NEXT: mflr r0 -; P9-NEXT: std r0, 16(r1) -; P9-NEXT: stdu r1, -32(r1) -; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_def_cfa_offset 48 ; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: bl __fixunstfsi +; P9-NEXT: .cfi_offset r30, -16 +; P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -48(r1) +; P9-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; P9-NEXT: xxlxor f3, f3, f3 +; P9-NEXT: lfs f0, .LCPI11_0@toc@l(r3) +; P9-NEXT: fcmpo cr1, f2, f3 +; P9-NEXT: lis r3, -32768 +; P9-NEXT: fcmpo cr0, f1, f0 +; P9-NEXT: xxlxor f3, f3, f3 +; P9-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt +; P9-NEXT: crandc 4*cr5+gt, lt, eq +; P9-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt +; P9-NEXT: isel r30, 0, r3, 4*cr5+lt +; P9-NEXT: bc 12, 4*cr5+lt, .LBB11_2 +; P9-NEXT: # %bb.1: # %entry +; P9-NEXT: fmr f3, f0 +; P9-NEXT: .LBB11_2: # %entry +; P9-NEXT: xxlxor f4, f4, f4 +; P9-NEXT: bl __gcc_qsub ; P9-NEXT: nop -; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: mffs f0 +; P9-NEXT: mtfsb1 31 +; P9-NEXT: mtfsb0 30 +; P9-NEXT: fadd f1, f2, f1 +; P9-NEXT: mtfsf 1, f0 +; P9-NEXT: xscvdpsxws f0, f1 +; P9-NEXT: mffprwz r3, f0 +; P9-NEXT: xor r3, r3, r30 +; P9-NEXT: clrldi r3, r3, 32 +; P9-NEXT: addi r1, r1, 48 ; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; P9-NEXT: mtlr r0 ; P9-NEXT: blr ; ; NOVSX-LABEL: ppcq_to_u32: ; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mfocrf r12, 32 ; NOVSX-NEXT: mflr r0 ; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -32(r1) -; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: stw r12, 8(r1) +; NOVSX-NEXT: stdu r1, -48(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 48 ; NOVSX-NEXT: .cfi_offset lr, 16 -; NOVSX-NEXT: bl __fixunstfsi +; NOVSX-NEXT: .cfi_offset cr2, 8 +; NOVSX-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; NOVSX-NEXT: addis r4, r2, .LCPI11_1@toc@ha +; NOVSX-NEXT: lfs f0, .LCPI11_0@toc@l(r3) +; NOVSX-NEXT: lfs f4, .LCPI11_1@toc@l(r4) +; NOVSX-NEXT: fcmpo cr0, f1, f0 +; NOVSX-NEXT: fcmpo cr1, f2, f4 +; NOVSX-NEXT: fmr f3, f4 +; NOVSX-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt +; NOVSX-NEXT: crandc 4*cr5+gt, lt, eq +; NOVSX-NEXT: cror 4*cr2+lt, 4*cr5+gt, 4*cr5+lt +; NOVSX-NEXT: bc 12, 4*cr2+lt, .LBB11_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr f3, f0 +; NOVSX-NEXT: .LBB11_2: # %entry +; NOVSX-NEXT: bl __gcc_qsub ; NOVSX-NEXT: nop -; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: mffs f0 +; NOVSX-NEXT: mtfsb1 31 +; NOVSX-NEXT: addi r3, r1, 44 +; NOVSX-NEXT: mtfsb0 30 +; NOVSX-NEXT: fadd f1, f2, f1 +; NOVSX-NEXT: mtfsf 1, f0 +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lis r3, -32768 +; NOVSX-NEXT: lwz r4, 44(r1) +; NOVSX-NEXT: isel r3, 0, r3, 4*cr2+lt +; NOVSX-NEXT: xor r3, r4, r3 +; NOVSX-NEXT: clrldi r3, r3, 32 +; NOVSX-NEXT: addi r1, r1, 48 ; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: lwz r12, 8(r1) +; NOVSX-NEXT: mtocrf 32, r12 ; NOVSX-NEXT: mtlr r0 ; NOVSX-NEXT: blr entry: @@ -747,12 +827,17 @@ entry: ret fp128 %conv } -define void @fptoint_nofpexcept(fp128 %m, i32* %addr1, i64* %addr2) { +define void @fptoint_nofpexcept(ppc_fp128 %p, fp128 %m, i32* %addr1, i64* %addr2) { ; MIR-LABEL: name: fptoint_nofpexcept ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPSWZ ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPUWZ ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPSDZ ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPUDZ +; +; MIR: renamable $f{{[0-9]+}} = nofpexcept FADD +; MIR: renamable $f{{[0-9]+}} = XSCVDPSXWS +; MIR: renamable $f{{[0-9]+}} = nofpexcept FADD +; MIR: renamable $f{{[0-9]+}} = XSCVDPSXWS entry: %conv1 = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.ignore") #0 store volatile i32 %conv1, i32* %addr1, align 4 @@ -762,6 +847,11 @@ entry: store volatile i64 %conv3, i64* %addr2, align 8 %conv4 = tail call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %m, metadata !"fpexcept.ignore") #0 store volatile i64 %conv4, i64* %addr2, align 8 + + %conv5 = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %p, metadata !"fpexcept.ignore") #0 + store volatile i32 %conv5, i32* %addr1, align 4 + %conv6 = tail call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128 %p, metadata !"fpexcept.ignore") #0 + store volatile i32 %conv6, i32* %addr1, align 4 ret void } diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll index 72c6a137b9afa..5ab12093954f8 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1202,38 +1202,36 @@ entry: define i32 @test_fptosi_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 { ; PC64LE-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: bl __gcc_qtou -; PC64LE-NEXT: nop -; PC64LE-NEXT: addi 1, 1, 32 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: mffs 0 +; PC64LE-NEXT: mtfsb1 31 +; PC64LE-NEXT: mtfsb0 30 +; PC64LE-NEXT: fadd 1, 2, 1 +; PC64LE-NEXT: mtfsf 1, 0 +; PC64LE-NEXT: xscvdpsxws 0, 1 +; PC64LE-NEXT: mffprwz 3, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: bl __gcc_qtou -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addi 1, 1, 32 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: mffs 0 +; PC64LE9-NEXT: mtfsb1 31 +; PC64LE9-NEXT: mtfsb0 30 +; PC64LE9-NEXT: fadd 1, 2, 1 +; PC64LE9-NEXT: mtfsf 1, 0 +; PC64LE9-NEXT: xscvdpsxws 0, 1 +; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: blr ; ; PC64-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64: # %bb.0: # %entry -; PC64-NEXT: mflr 0 -; PC64-NEXT: std 0, 16(1) -; PC64-NEXT: stdu 1, -112(1) -; PC64-NEXT: bl __gcc_qtou -; PC64-NEXT: nop -; PC64-NEXT: addi 1, 1, 112 -; PC64-NEXT: ld 0, 16(1) -; PC64-NEXT: mtlr 0 +; PC64-NEXT: mffs 0 +; PC64-NEXT: mtfsb1 31 +; PC64-NEXT: mtfsb0 30 +; PC64-NEXT: fadd 1, 2, 1 +; PC64-NEXT: mtfsf 1, 0 +; PC64-NEXT: fctiwz 0, 1 +; PC64-NEXT: stfd 0, -8(1) +; PC64-NEXT: lwz 3, -4(1) ; PC64-NEXT: blr entry: %fpext = call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128( @@ -1289,24 +1287,76 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 { ; PC64LE-LABEL: test_fptoui_ppc_i32_ppc_fp128: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 30, -16(1) # 8-byte Folded Spill ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: bl __fixunstfsi +; PC64LE-NEXT: stdu 1, -48(1) +; PC64LE-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64LE-NEXT: xxlxor 3, 3, 3 +; PC64LE-NEXT: lfs 0, .LCPI31_0@toc@l(3) +; PC64LE-NEXT: fcmpo 0, 2, 3 +; PC64LE-NEXT: lis 3, -32768 +; PC64LE-NEXT: xxlxor 3, 3, 3 +; PC64LE-NEXT: fcmpo 1, 1, 0 +; PC64LE-NEXT: crand 20, 6, 0 +; PC64LE-NEXT: crandc 21, 4, 6 +; PC64LE-NEXT: cror 20, 21, 20 +; PC64LE-NEXT: isel 30, 0, 3, 20 +; PC64LE-NEXT: bc 12, 20, .LBB31_2 +; PC64LE-NEXT: # %bb.1: # %entry +; PC64LE-NEXT: fmr 3, 0 +; PC64LE-NEXT: .LBB31_2: # %entry +; PC64LE-NEXT: xxlxor 4, 4, 4 +; PC64LE-NEXT: bl __gcc_qsub ; PC64LE-NEXT: nop -; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: mffs 0 +; PC64LE-NEXT: mtfsb1 31 +; PC64LE-NEXT: mtfsb0 30 +; PC64LE-NEXT: fadd 1, 2, 1 +; PC64LE-NEXT: mtfsf 1, 0 +; PC64LE-NEXT: xscvdpsxws 0, 1 +; PC64LE-NEXT: mffprwz 3, 0 +; PC64LE-NEXT: xor 3, 3, 30 +; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: test_fptoui_ppc_i32_ppc_fp128: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 30, -16(1) # 8-byte Folded Spill ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: bl __fixunstfsi +; PC64LE9-NEXT: stdu 1, -48(1) +; PC64LE9-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64LE9-NEXT: xxlxor 3, 3, 3 +; PC64LE9-NEXT: lfs 0, .LCPI31_0@toc@l(3) +; PC64LE9-NEXT: fcmpo 1, 2, 3 +; PC64LE9-NEXT: lis 3, -32768 +; PC64LE9-NEXT: fcmpo 0, 1, 0 +; PC64LE9-NEXT: xxlxor 3, 3, 3 +; PC64LE9-NEXT: crand 20, 2, 4 +; PC64LE9-NEXT: crandc 21, 0, 2 +; PC64LE9-NEXT: cror 20, 21, 20 +; PC64LE9-NEXT: isel 30, 0, 3, 20 +; PC64LE9-NEXT: bc 12, 20, .LBB31_2 +; PC64LE9-NEXT: # %bb.1: # %entry +; PC64LE9-NEXT: fmr 3, 0 +; PC64LE9-NEXT: .LBB31_2: # %entry +; PC64LE9-NEXT: xxlxor 4, 4, 4 +; PC64LE9-NEXT: bl __gcc_qsub ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: mffs 0 +; PC64LE9-NEXT: mtfsb1 31 +; PC64LE9-NEXT: mtfsb0 30 +; PC64LE9-NEXT: fadd 1, 2, 1 +; PC64LE9-NEXT: mtfsf 1, 0 +; PC64LE9-NEXT: xscvdpsxws 0, 1 +; PC64LE9-NEXT: mffprwz 3, 0 +; PC64LE9-NEXT: xor 3, 3, 30 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr ; @@ -1314,12 +1364,45 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 { ; PC64: # %bb.0: # %entry ; PC64-NEXT: mflr 0 ; PC64-NEXT: std 0, 16(1) -; PC64-NEXT: stdu 1, -112(1) -; PC64-NEXT: bl __fixunstfsi +; PC64-NEXT: mfcr 12 +; PC64-NEXT: stw 12, 8(1) +; PC64-NEXT: stdu 1, -128(1) +; PC64-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64-NEXT: lfs 0, .LCPI31_0@toc@l(3) +; PC64-NEXT: addis 3, 2, .LCPI31_1@toc@ha +; PC64-NEXT: lfs 4, .LCPI31_1@toc@l(3) +; PC64-NEXT: fcmpo 0, 1, 0 +; PC64-NEXT: crandc 21, 0, 2 +; PC64-NEXT: fcmpo 1, 2, 4 +; PC64-NEXT: crand 20, 2, 4 +; PC64-NEXT: cror 8, 21, 20 +; PC64-NEXT: fmr 3, 4 +; PC64-NEXT: bc 12, 8, .LBB31_2 +; PC64-NEXT: # %bb.1: # %entry +; PC64-NEXT: fmr 3, 0 +; PC64-NEXT: .LBB31_2: # %entry +; PC64-NEXT: bl __gcc_qsub ; PC64-NEXT: nop -; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: mffs 0 +; PC64-NEXT: mtfsb1 31 +; PC64-NEXT: lis 4, -32768 +; PC64-NEXT: bc 12, 8, .LBB31_3 +; PC64-NEXT: b .LBB31_4 +; PC64-NEXT: .LBB31_3: # %entry +; PC64-NEXT: li 4, 0 +; PC64-NEXT: .LBB31_4: # %entry +; PC64-NEXT: mtfsb0 30 +; PC64-NEXT: fadd 1, 2, 1 +; PC64-NEXT: mtfsf 1, 0 +; PC64-NEXT: fctiwz 0, 1 +; PC64-NEXT: stfd 0, 120(1) +; PC64-NEXT: lwz 3, 124(1) +; PC64-NEXT: xor 3, 3, 4 +; PC64-NEXT: addi 1, 1, 128 ; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: lwz 12, 8(1) ; PC64-NEXT: mtlr 0 +; PC64-NEXT: mtcrf 32, 12 # cr2 ; PC64-NEXT: blr entry: %fpext = call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128( From 3a6f3fc16039293920bfd6982052fd5fc14529e4 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 4 Sep 2020 15:36:48 +0200 Subject: [PATCH 239/465] Fix return status of SimplifyCFG When a switch case is folded into default's case, that's an IR change that should be reported, update ConstantFoldTerminator accordingly. Differential Revision: https://reviews.llvm.org/D87142 --- llvm/lib/Transforms/Utils/Local.cpp | 5 ++- .../Transforms/SimplifyCFG/merge-default.ll | 45 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/merge-default.ll diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 2b0ae722458b3..41349457e2b95 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -182,6 +182,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, TheOnlyDest = SI->case_begin()->getCaseSuccessor(); } + bool Changed = false; + // Figure out which case it goes to. for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) { // Found case matching a constant operand? @@ -220,6 +222,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, DefaultDest->removePredecessor(ParentBB); i = SI->removeCase(i); e = SI->case_end(); + Changed = true; if (DTU) DTU->applyUpdatesPermissive( {{DominatorTree::Delete, ParentBB, DefaultDest}}); @@ -308,7 +311,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SI->eraseFromParent(); return true; } - return false; + return Changed; } if (auto *IBI = dyn_cast(T)) { diff --git a/llvm/test/Transforms/SimplifyCFG/merge-default.ll b/llvm/test/Transforms/SimplifyCFG/merge-default.ll new file mode 100644 index 0000000000000..93b64d708807a --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-default.ll @@ -0,0 +1,45 @@ +; RUN: opt -simplifycfg -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @g() +declare void @f() + +define void @foo(i32 %Kind) { +; CHECK-LABEL: @foo( +; CHECK-NEXT:entry: +; CHECK-NEXT: switch i32 %Kind, label %sw.epilog [ +; CHECK-NEXT: i32 15, label %sw.bb2 +; CHECK-NEXT: i32 2, label %sw.bb +; CHECK-NEXT: ] +; CHECK: sw.bb: +; CHECK-NEXT: call void @g() +; CHECK-NEXT: call void @g() +; CHECK-NEXT: br label %sw.epilog +; CHECK: sw.bb2: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: br label %sw.epilog +; CHECK: sw.epilog: +; CHECK-NEXT: ret void +; CHECK-NEXT:} + +entry: + switch i32 %Kind, label %sw.epilog [ + i32 1, label %sw.epilog + i32 2, label %sw.bb + i32 15, label %sw.bb2 + ] + +sw.bb: + call void @g() + call void @g() + br label %sw.epilog + +sw.bb2: + call void @f() + br label %sw.epilog + +sw.epilog: + ret void +} From 39caf9e94091629c65038b90215e24eea47a5ce5 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 10:27:55 +0200 Subject: [PATCH 240/465] [SCCP] Add tests for intrinsic ranges (NFC) --- llvm/test/Transforms/SCCP/intrinsics.ll | 106 ++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 llvm/test/Transforms/SCCP/intrinsics.ll diff --git a/llvm/test/Transforms/SCCP/intrinsics.ll b/llvm/test/Transforms/SCCP/intrinsics.ll new file mode 100644 index 0000000000000..d06b94162b5be --- /dev/null +++ b/llvm/test/Transforms/SCCP/intrinsics.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -ipsccp -S %s | FileCheck %s + +declare i8 @llvm.abs.i8(i8, i1) +declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1) +declare i8 @llvm.umax.i8(i8, i8) + +declare void @use(i1) +declare void @use_vec(<2 x i1>) + +define void @abs1(i8* %p) { +; CHECK-LABEL: @abs1( +; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[P:%.*]], align 1, [[RNG0:!range !.*]] +; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X]], i1 false) +; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i8 [[ABS]], 0 +; CHECK-NEXT: call void @use(i1 [[CMP1]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i8 [[ABS]], 10 +; CHECK-NEXT: call void @use(i1 [[CMP2]]) +; CHECK-NEXT: [[CMP3:%.*]] = icmp sge i8 [[ABS]], 1 +; CHECK-NEXT: call void @use(i1 [[CMP3]]) +; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i8 [[ABS]], 9 +; CHECK-NEXT: call void @use(i1 [[CMP4]]) +; CHECK-NEXT: ret void +; + %x = load i8, i8* %p, !range !{i8 -9, i8 10} + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp1 = icmp sge i8 %abs, 0 + call void @use(i1 %cmp1) + %cmp2 = icmp slt i8 %abs, 10 + call void @use(i1 %cmp2) + %cmp3 = icmp sge i8 %abs, 1 + call void @use(i1 %cmp3) + %cmp4 = icmp slt i8 %abs, 9 + call void @use(i1 %cmp4) + ret void +} + +; Even if we don't know anything about the input range of the operand, +; we still know something about the result range of abs(). +define void @abs2(i8 %x) { +; CHECK-LABEL: @abs2( +; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 true) +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[ABS]], 0 +; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 true) + %cmp = icmp sge i8 %abs, 0 + call void @use(i1 %cmp) + ret void +} + +define void @abs2_vec(<2 x i8> %x) { +; CHECK-LABEL: @abs2_vec( +; CHECK-NEXT: [[ABS:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[X:%.*]], i1 true) +; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i8> [[ABS]], zeroinitializer +; CHECK-NEXT: call void @use_vec(<2 x i1> [[CMP]]) +; CHECK-NEXT: ret void +; + %abs = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %x, i1 true) + %cmp = icmp sge <2 x i8> %abs, zeroinitializer + call void @use_vec(<2 x i1> %cmp) + ret void +} + +define void @umax1(i8* %p1, i8* %p2) { +; CHECK-LABEL: @umax1( +; CHECK-NEXT: [[X1:%.*]] = load i8, i8* [[P1:%.*]], align 1, [[RNG1:!range !.*]] +; CHECK-NEXT: [[X2:%.*]] = load i8, i8* [[P2:%.*]], align 1, [[RNG2:!range !.*]] +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X1]], i8 [[X2]]) +; CHECK-NEXT: [[CMP1:%.*]] = icmp uge i8 [[M]], 5 +; CHECK-NEXT: call void @use(i1 [[CMP1]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[M]], 15 +; CHECK-NEXT: call void @use(i1 [[CMP2]]) +; CHECK-NEXT: [[CMP3:%.*]] = icmp uge i8 [[M]], 6 +; CHECK-NEXT: call void @use(i1 [[CMP3]]) +; CHECK-NEXT: [[CMP4:%.*]] = icmp ult i8 [[M]], 14 +; CHECK-NEXT: call void @use(i1 [[CMP4]]) +; CHECK-NEXT: ret void +; + %x1 = load i8, i8* %p1, !range !{i8 0, i8 10} + %x2 = load i8, i8* %p2, !range !{i8 5, i8 15} + %m = call i8 @llvm.umax.i8(i8 %x1, i8 %x2) + %cmp1 = icmp uge i8 %m, 5 + call void @use(i1 %cmp1) + %cmp2 = icmp ult i8 %m, 15 + call void @use(i1 %cmp2) + %cmp3 = icmp uge i8 %m, 6 + call void @use(i1 %cmp3) + %cmp4 = icmp ult i8 %m, 14 + call void @use(i1 %cmp4) + ret void +} + +define void @umax2(i8 %x) { +; CHECK-LABEL: @umax2( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 10) +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[M]], 10 +; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 10) + %cmp = icmp uge i8 %m, 10 + call void @use(i1 %cmp) + ret void +} From 714ceefad9b96ab3ef20913f2110883a1ad34a13 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Mon, 31 Aug 2020 13:26:36 +0200 Subject: [PATCH 241/465] [SelectionDAG] Always intersect SDNode flags during getNode() node memoization. Previously SDNodeFlags::instersectWith(Flags) would do nothing if Flags was in an undefined state, which is very bad given that this is the default when getNode() is called without passing an explicit SDNodeFlags argument. This meant that if an already existing and reused node had a flag which the second caller to getNode() did not set, that flag would remain uncleared. This was exposed by https://bugs.llvm.org/show_bug.cgi?id=47092, where an NSW flag was incorrectly set on an add instruction (which did in fact overflow in one of the two original contexts), so when SystemZElimCompare removed the compare with 0 trusting that flag, wrong-code resulted. There is more that needs to be done in this area as discussed here: Differential Revision: https://reviews.llvm.org/D86871 Review: Ulrich Weigand, Sanjay Patel --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 11 +++---- .../SelectionDAG/SelectionDAGBuilder.cpp | 2 ++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 4 +-- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 4 +-- llvm/test/CodeGen/SystemZ/fp-mul-14.ll | 20 +++++++++++++ llvm/test/CodeGen/SystemZ/int-cmp-60.ll | 29 +++++++++++++++++++ 6 files changed, 59 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/fp-mul-14.ll create mode 100644 llvm/test/CodeGen/SystemZ/int-cmp-60.ll diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index cde075f41f739..6eef79162f8a7 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -357,9 +357,8 @@ template<> struct simplify_type { /// the backend. struct SDNodeFlags { private: - // This bit is used to determine if the flags are in a defined state. - // Flag bits can only be masked out during intersection if the masking flags - // are defined. + // This bit is used to determine if the flags are in a defined state. It is + // only used by SelectionDAGBuilder. bool AnyDefined : 1; bool NoUnsignedWrap : 1; @@ -464,11 +463,9 @@ struct SDNodeFlags { bool hasAllowReassociation() const { return AllowReassociation; } bool hasNoFPExcept() const { return NoFPExcept; } - /// Clear any flags in this flag set that aren't also set in Flags. - /// If the given Flags are undefined then don't do anything. + /// Clear any flags in this flag set that aren't also set in Flags. All + /// flags will be cleared if Flags are undefined. void intersectWith(const SDNodeFlags Flags) { - if (!Flags.isDefined()) - return; NoUnsignedWrap &= Flags.NoUnsignedWrap; NoSignedWrap &= Flags.NoSignedWrap; Exact &= Flags.Exact; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1a2c77974c2b9..5e6cb03f3839c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1128,6 +1128,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) { // TODO: We could handle all flags (nsw, etc) here. // TODO: If an IR instruction maps to >1 node, only the final node will have // flags set. + // TODO: The handling of flags should be improved, see + // https://reviews.llvm.org/D86871 if (SDNode *Node = getNodeForIRValue(&I)) { SDNodeFlags IncomingFlags; IncomingFlags.copyFMF(*FPMO); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 151b1bdd55381..5dd42d1f4a6a3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -521,8 +521,8 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { return true; // TODO: Move into isKnownNeverNaN - if (N->getFlags().isDefined()) - return N->getFlags().hasNoNaNs(); + if (N->getFlags().hasNoNaNs()) + return true; return CurDAG->isKnownNeverNaN(N); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index c7fdc79c3b1a0..932a05a4ba8c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -150,8 +150,8 @@ class AMDGPUTargetLowering : public TargetLowering { return true; const auto Flags = Op.getNode()->getFlags(); - if (Flags.isDefined()) - return Flags.hasNoSignedZeros(); + if (Flags.hasNoSignedZeros()) + return true; return false; } diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-14.ll b/llvm/test/CodeGen/SystemZ/fp-mul-14.ll new file mode 100644 index 0000000000000..8bab2135739c4 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-mul-14.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; +; Check that a multiply-and-add results. + +; FIXME: This test is xfailed temporarily +; XFAIL: * + +define void @f1(float %arg, float* %Dst) { +; CHECK-LABEL: f1: +; CHECK: maeb +bb: + %i = fmul contract float %arg, 0xBE6777A5C0000000 + %i4 = fadd contract float %i, 1.000000e+00 + %i5 = fmul contract float %arg, 0xBE6777A5C0000000 + %i6 = fadd contract float %i5, 1.000000e+00 + %i7 = fmul contract float %i4, 2.000000e+00 + store float %i7, float* %Dst + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-60.ll b/llvm/test/CodeGen/SystemZ/int-cmp-60.ll new file mode 100644 index 0000000000000..faae4f9bced23 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-cmp-60.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; +; Test that DAGCombiner properly clears the NUW/NSW flags on the memoized add +; node. + +define void @fun(i64* %Src, i32* %Dst) { +; CHECK-LABEL: fun: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iilf %r0, 1303940520 +; CHECK-NEXT: n %r0, 4(%r2) +; CHECK-NEXT: lr %r1, %r0 +; CHECK-NEXT: afi %r1, 1628135358 +; CHECK-NEXT: locrnhe %r1, %r0 +; CHECK-NEXT: st %r1, 0(%r3) +; CHECK-NEXT: br %r14 +entry: + %0 = load i64, i64* %Src, align 8 + %1 = trunc i64 %0 to i32 + %conv = and i32 %1, 1303940520 + %xor11.i = or i32 %conv, -2147483648 + %xor2.i = add i32 %xor11.i, -519348290 + %cmp.i = icmp slt i32 %xor2.i, 0 + %sub3.i = add nuw nsw i32 %conv, 1628135358 + %cond.i = select i1 %cmp.i, i32 %conv, i32 %sub3.i + store i32 %cond.i, i32* %Dst + ret void +} + From d401e376e464b97cabde1fa9c675630e7f275a1c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 10:56:19 +0200 Subject: [PATCH 242/465] [InstCombine] Test abs with dominating condition (NFC) --- .../Transforms/InstCombine/abs-intrinsic.ll | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index 20ba876c53441..7442ed7be1131 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -126,3 +126,49 @@ define <4 x i32> @abs_of_select_neg_false_val(<4 x i1> %b, <4 x i32> %x) { %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sel, i1 false) ret <4 x i32> %abs } + +define i32 @abs_dom_cond_nopoison(i32 %x) { +; CHECK-LABEL: @abs_dom_cond_nopoison( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: [[A1:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NEXT: ret i32 [[A1]] +; CHECK: false: +; CHECK-NEXT: [[A2:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NEXT: ret i32 [[A2]] +; + %cmp = icmp sge i32 %x, 0 + br i1 %cmp, label %true, label %false + +true: + %a1 = call i32 @llvm.abs.i32(i32 %x, i1 false) + ret i32 %a1 + +false: + %a2 = call i32 @llvm.abs.i32(i32 %x, i1 false) + ret i32 %a2 +} + +define i32 @abs_dom_cond_poison(i32 %x) { +; CHECK-LABEL: @abs_dom_cond_poison( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: true: +; CHECK-NEXT: [[A1:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 true) +; CHECK-NEXT: ret i32 [[A1]] +; CHECK: false: +; CHECK-NEXT: [[A2:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 true) +; CHECK-NEXT: ret i32 [[A2]] +; + %cmp = icmp sge i32 %x, 0 + br i1 %cmp, label %true, label %false + +true: + %a1 = call i32 @llvm.abs.i32(i32 %x, i1 true) + ret i32 %a1 + +false: + %a2 = call i32 @llvm.abs.i32(i32 %x, i1 true) + ret i32 %a2 +} From 1903a1afd947603bea37c40e249c7caa26087258 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 12:22:42 +0200 Subject: [PATCH 243/465] [InstCombine] Add tests for cttz of abs intrinsic (NFC) --- llvm/test/Transforms/InstCombine/cttz-abs.ll | 45 +++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/cttz-abs.ll b/llvm/test/Transforms/InstCombine/cttz-abs.ll index 3faa8665376cc..8da3668997fa7 100644 --- a/llvm/test/Transforms/InstCombine/cttz-abs.ll +++ b/llvm/test/Transforms/InstCombine/cttz-abs.ll @@ -3,7 +3,7 @@ define i32 @cttz_abs(i32 %x) { ; CHECK-LABEL: @cttz_abs( -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), [[RNG0:!range !.*]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 0 @@ -29,7 +29,7 @@ define i32 @cttz_abs2(i32 %x) { ; CHECK-LABEL: @cttz_abs2( ; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[X:%.*]], 0 ; CHECK-NEXT: call void @use_cond(i1 [[C]]) -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp sgt i32 %x, 0 @@ -44,7 +44,7 @@ define i32 @cttz_abs3(i32 %x) { ; CHECK-LABEL: @cttz_abs3( ; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: call void @use_cond(i1 [[C]]) -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp sgt i32 %x, -1 @@ -57,7 +57,7 @@ define i32 @cttz_abs3(i32 %x) { define i32 @cttz_abs4(i32 %x) { ; CHECK-LABEL: @cttz_abs4( -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 1 @@ -69,7 +69,7 @@ define i32 @cttz_abs4(i32 %x) { define i32 @cttz_nabs(i32 %x) { ; CHECK-LABEL: @cttz_nabs( -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 0 @@ -93,7 +93,7 @@ define <2 x i64> @cttz_nabs_vec(<2 x i64> %x) { define i64 @cttz_abs_64(i64 %x) { ; CHECK-LABEL: @cttz_abs_64( -; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), !range !1 +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), [[RNG1:!range !.*]] ; CHECK-NEXT: ret i64 [[R]] ; %c = icmp slt i64 %x, 0 @@ -109,7 +109,7 @@ define i32 @cttz_abs_multiuse(i32 %x) { ; CHECK-NEXT: [[S:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[S]], i32 [[X]] ; CHECK-NEXT: call void @use_abs(i32 [[D]]) -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 1 @@ -126,7 +126,7 @@ define i32 @cttz_nabs_multiuse(i32 %x) { ; CHECK-NEXT: [[S:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[S]] ; CHECK-NEXT: call void @use_abs(i32 [[D]]) -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 1 @@ -144,7 +144,7 @@ define i32 @no_cttz_abs(i32 %x) { ; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[X:%.*]], 2 ; CHECK-NEXT: [[S:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[S]], i32 [[X]] -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 2 @@ -159,7 +159,7 @@ define i32 @no_cttz_abs2(i32 %x) { ; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[X:%.*]], 0 ; CHECK-NEXT: [[S:%.*]] = sub i32 1, [[X]] ; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[S]], i32 [[X]] -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp slt i32 %x, 0 @@ -175,7 +175,7 @@ define i32 @no_cttz_abs3(i32 %x) { ; CHECK-NEXT: call void @use_cond(i1 [[C]]) ; CHECK-NEXT: [[S:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[S]] -; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), !range !0 +; CHECK-NEXT: [[R:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[D]], i1 true), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %c = icmp sgt i32 %x, -2 @@ -216,9 +216,32 @@ define <2 x i64> @no_cttz_nabs_vec(<2 x i64> %x) { ret <2 x i64> %r } +define i32 @cttz_abs_intrin(i32 %x) { +; CHECK-LABEL: @cttz_abs_intrin( +; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 false), [[RNG0]] +; CHECK-NEXT: ret i32 [[R]] +; + %a = call i32 @llvm.abs.i32(i32 %x, i1 false) + %r = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %r +} + +define i32 @cttz_nabs_intrin(i32 %x) { +; CHECK-LABEL: @cttz_nabs_intrin( +; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 false), [[RNG0]] +; CHECK-NEXT: ret i32 [[R]] +; + %a = call i32 @llvm.abs.i32(i32 %x, i1 false) + %n = sub i32 0, %a + %r = call i32 @llvm.cttz.i32(i32 %n, i1 false) + ret i32 %r +} declare void @use_cond(i1) declare void @use_abs(i32) declare i32 @llvm.cttz.i32(i32, i1) declare i64 @llvm.cttz.i64(i64) declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>) +declare i32 @llvm.abs.i32(i32, i1) From 10cb23c6ca451374ebe2f31add236692e01637f3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 12:25:41 +0200 Subject: [PATCH 244/465] [InstCombine] Fold cttz of abs intrinsic Same as the existing fold for SPF_ABS. We don't need to explicitly handle the NABS variant, as we'll first fold away the neg in that case. --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 3 +++ llvm/test/Transforms/InstCombine/cttz-abs.ll | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index dd4e48170f028..311a18c7f584d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -428,6 +428,9 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor; if (SPF == SPF_ABS || SPF == SPF_NABS) return IC.replaceOperand(II, 0, X); + + if (match(Op0, m_Intrinsic(m_Value(X)))) + return IC.replaceOperand(II, 0, X); } KnownBits Known = IC.computeKnownBits(Op0, 0, &II); diff --git a/llvm/test/Transforms/InstCombine/cttz-abs.ll b/llvm/test/Transforms/InstCombine/cttz-abs.ll index 8da3668997fa7..b89a55c8f5b87 100644 --- a/llvm/test/Transforms/InstCombine/cttz-abs.ll +++ b/llvm/test/Transforms/InstCombine/cttz-abs.ll @@ -218,8 +218,7 @@ define <2 x i64> @no_cttz_nabs_vec(<2 x i64> %x) { define i32 @cttz_abs_intrin(i32 %x) { ; CHECK-LABEL: @cttz_abs_intrin( -; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 false), [[RNG0]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %a = call i32 @llvm.abs.i32(i32 %x, i1 false) @@ -229,8 +228,7 @@ define i32 @cttz_abs_intrin(i32 %x) { define i32 @cttz_nabs_intrin(i32 %x) { ; CHECK-LABEL: @cttz_nabs_intrin( -; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 false), [[RNG0]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), [[RNG0]] ; CHECK-NEXT: ret i32 [[R]] ; %a = call i32 @llvm.abs.i32(i32 %x, i1 false) From 3ab13348ba4c25f12254dd6c772f9fb1060b4425 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 12:36:27 +0200 Subject: [PATCH 245/465] [InstCombine] Add tests for mul of abs intrinsic (NFC) --- llvm/test/Transforms/InstCombine/mul.ll | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index 2e9250fe94c41..8d00ad560f9c9 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s +declare i32 @llvm.abs.i32(i32, i1) + define i32 @pow2_multiplier(i32 %A) { ; CHECK-LABEL: @pow2_multiplier( ; CHECK-NEXT: [[B:%.*]] = shl i32 [[A:%.*]], 1 @@ -858,6 +860,29 @@ define <4 x i32> @combine_mul_nabs_v4i32(<4 x i32> %0) { ret <4 x i32> %m } +define i32 @combine_mul_abs_intrin(i32 %x) { +; CHECK-LABEL: @combine_mul_abs_intrin( +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ABS]], [[ABS]] +; CHECK-NEXT: ret i32 [[MUL]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %mul = mul i32 %abs, %abs + ret i32 %mul +} + +define i32 @combine_mul_nabs_intrin(i32 %x) { +; CHECK-LABEL: @combine_mul_nabs_intrin( +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ABS]], [[ABS]] +; CHECK-NEXT: ret i32 [[MUL]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %neg = sub i32 0, %abs + %mul = mul i32 %neg, %neg + ret i32 %mul +} + ; z * splat(0) = splat(0), even for scalable vectors define @mul_scalable_splat_zero( %z) { ; CHECK-LABEL: @mul_scalable_splat_zero( From 58b28fa7a2fd57051f3d2911878776d6f57b18d8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 12:37:45 +0200 Subject: [PATCH 246/465] [InstCombine] Fold mul of abs intrinsic Same as the existing SPF_ABS fold. We don't need to explicitly handle NABS, as the negs will get folded away first. --- llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 3 +++ llvm/test/Transforms/InstCombine/mul.ll | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 99f19d9663b7b..245fd588a5231 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -275,6 +275,9 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor; if (SPF == SPF_ABS || SPF == SPF_NABS) return BinaryOperator::CreateMul(X, X); + + if (match(Op0, m_Intrinsic(m_Value(X)))) + return BinaryOperator::CreateMul(X, X); } // -X * C --> X * -C diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index 8d00ad560f9c9..9959841f813ac 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -862,8 +862,7 @@ define <4 x i32> @combine_mul_nabs_v4i32(<4 x i32> %0) { define i32 @combine_mul_abs_intrin(i32 %x) { ; CHECK-LABEL: @combine_mul_abs_intrin( -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) -; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ABS]], [[ABS]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X:%.*]], [[X]] ; CHECK-NEXT: ret i32 [[MUL]] ; %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) @@ -873,8 +872,7 @@ define i32 @combine_mul_abs_intrin(i32 %x) { define i32 @combine_mul_nabs_intrin(i32 %x) { ; CHECK-LABEL: @combine_mul_nabs_intrin( -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) -; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ABS]], [[ABS]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X:%.*]], [[X]] ; CHECK-NEXT: ret i32 [[MUL]] ; %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) From 8544defdcb09bbbbc25c5958e5f5b5762e9b9046 Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Sat, 5 Sep 2020 14:21:42 +0200 Subject: [PATCH 247/465] Thread safety analysis: Document how try-acquire is handled I don't think this is obvious, since try-acquire seemingly contradicts our usual requirements of "no conditional locking". Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D87065 --- clang/docs/ThreadSafetyAnalysis.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/clang/docs/ThreadSafetyAnalysis.rst b/clang/docs/ThreadSafetyAnalysis.rst index ea8e98a1884bf..b8d7d24275b92 100644 --- a/clang/docs/ThreadSafetyAnalysis.rst +++ b/clang/docs/ThreadSafetyAnalysis.rst @@ -414,6 +414,26 @@ The first argument must be ``true`` or ``false``, to specify which return value indicates success, and the remaining arguments are interpreted in the same way as ``ACQUIRE``. See :ref:`mutexheader`, below, for example uses. +Because the analysis doesn't support conditional locking, a capability is +treated as acquired after the first branch on the return value of a try-acquire +function. + +.. code-block:: c++ + + Mutex mu; + int a GUARDED_BY(mu); + + void foo() { + bool success = mu.TryLock(); + a = 0; // Warning, mu is not locked. + if (success) { + a = 0; // Ok. + mu.Unlock(); + } else { + a = 0; // Warning, mu is not locked. + } + } + ASSERT_CAPABILITY(...) and ASSERT_SHARED_CAPABILITY(...) -------------------------------------------------------- From 16975a638df3cda95c677055120b23e689d96dcd Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Sat, 5 Sep 2020 14:23:54 +0200 Subject: [PATCH 248/465] Set InvalidDecl directly when deserializing a Decl When parsing a C++17 binding declaration, we first create the BindingDecls in Sema::ActOnDecompositionDeclarator, and then build the DecompositionDecl in Sema::ActOnVariableDeclarator, so the contained BindingDecls are never null. But when deserializing, we read the DecompositionDecl with all properties before filling in the Bindings. Among other things, reading a declaration reads whether it's invalid, then calling setInvalidDecl which assumes that all bindings of the DecompositionDecl are available, but that isn't the case. Deserialization should just set all properties directly without invoking subsequent functions, so we just set the flag without using the setter. Fixes PR34960. Reviewed By: rsmith Differential Revision: https://reviews.llvm.org/D86207 --- clang/lib/Serialization/ASTReaderDecl.cpp | 2 +- clang/test/PCH/cxx1z-decomposition.cpp | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 47b378f5727b4..f5a66dc3c2d10 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -585,7 +585,7 @@ void ASTDeclReader::VisitDecl(Decl *D) { Reader.getContext()); } D->setLocation(ThisDeclLoc); - D->setInvalidDecl(Record.readInt()); + D->InvalidDecl = Record.readInt(); if (Record.readInt()) { // hasAttrs AttrVec Attrs; Record.readAttributes(Attrs); diff --git a/clang/test/PCH/cxx1z-decomposition.cpp b/clang/test/PCH/cxx1z-decomposition.cpp index 2f817b4280ded..914ce80c550d1 100644 --- a/clang/test/PCH/cxx1z-decomposition.cpp +++ b/clang/test/PCH/cxx1z-decomposition.cpp @@ -2,11 +2,11 @@ // RUN: %clang_cc1 -pedantic -std=c++1z -include %s -verify %s // // With PCH: -// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch %s -o %t -// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -verify %s +// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch -fallow-pch-with-compiler-errors %s -o %t +// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -fallow-pch-with-compiler-errors -verify %s -// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch -fpch-instantiate-templates %s -o %t -// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -verify %s +// RUN: %clang_cc1 -pedantic -std=c++1z -emit-pch -fallow-pch-with-compiler-errors -fpch-instantiate-templates %s -o %t +// RUN: %clang_cc1 -pedantic -std=c++1z -include-pch %t -fallow-pch-with-compiler-errors -verify %s #ifndef HEADER #define HEADER @@ -22,6 +22,8 @@ constexpr int foo(Q &&q) { return a * 10 + b; } +auto [noinit]; // expected-error{{decomposition declaration '[noinit]' requires an initializer}} + #else int arr[2]; From 430b47a17d2281bd566fc1aac19de80b99e6f0c6 Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Fri, 4 Sep 2020 17:08:17 +0530 Subject: [PATCH 249/465] [MLIR] Remove unused arg from affine tiling validity check Drop unused function arg from affine loop tiling validity check. --- mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp index 1889711cbf7a2..5bded917978a7 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp @@ -167,8 +167,7 @@ constructTiledIndexSetHyperRect(MutableArrayRef origLoops, /// function will return failure when any dependence component is negative along /// any of `origLoops`. static LogicalResult -checkTilingLegality(MutableArrayRef origLoops, - ArrayRef tileSizes) { +checkTilingLegality(MutableArrayRef origLoops) { assert(!origLoops.empty() && "no original loops provided"); // We first find out all dependences we intend to check. @@ -242,7 +241,7 @@ mlir::tilePerfectlyNested(MutableArrayRef input, auto origLoops = input; // Perform tiling legality test. - if (failed(checkTilingLegality(origLoops, tileSizes))) + if (failed(checkTilingLegality(origLoops))) origLoops[0].emitRemark("tiled code is illegal due to dependences"); AffineForOp rootAffineForOp = origLoops[0]; From 94c71d6aa1632170e27c60db4aa63c86e95bd450 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 15:05:59 +0200 Subject: [PATCH 250/465] [InstCombine] Add tests for abs intrinsic eq zero (NFC) --- llvm/test/Transforms/InstCombine/icmp.ll | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 3b223d71f9d48..b284c5a2c8406 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -3,6 +3,8 @@ target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +declare i8 @llvm.abs.i8(i8, i1) + define i32 @test1(i32 %X) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr i32 [[X:%.*]], 31 @@ -1090,6 +1092,28 @@ define zeroext i1 @cmpabs2(i64 %val) { ret i1 %tobool } +define i1 @abs_intrin_eq_zero(i8 %x) { +; CHECK-LABEL: @abs_intrin_eq_zero( +; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ABS]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp = icmp eq i8 %abs, 0 + ret i1 %cmp +} + +define i1 @abs_intrin_ne_zero(i8 %x) { +; CHECK-LABEL: @abs_intrin_ne_zero( +; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[ABS]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp = icmp ne i8 %abs, 0 + ret i1 %cmp +} + define void @test58() { ; CHECK-LABEL: @test58( ; CHECK-NEXT: [[CALL:%.*]] = call i32 @test58_d(i64 36029346783166592) From ada8a17d945c17c5603e24824f642ca199412adf Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 15:10:09 +0200 Subject: [PATCH 251/465] [InstCombine] Fold abs intrinsic eq zero Following the same transform for the select version of abs. --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 7 +++++++ llvm/test/Transforms/InstCombine/icmp.ll | 6 ++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8860586c9aa19..350d00095c6f1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3088,6 +3088,13 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant( Type *Ty = II->getType(); unsigned BitWidth = C.getBitWidth(); switch (II->getIntrinsicID()) { + case Intrinsic::abs: + // abs(A) == 0 -> A == 0 + if (C.isNullValue()) + return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0), + Constant::getNullValue(Ty)); + break; + case Intrinsic::bswap: // bswap(A) == C -> A == bswap(C) return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0), diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index b284c5a2c8406..a9bda13e15b90 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -1094,8 +1094,7 @@ define zeroext i1 @cmpabs2(i64 %val) { define i1 @abs_intrin_eq_zero(i8 %x) { ; CHECK-LABEL: @abs_intrin_eq_zero( -; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ABS]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) @@ -1105,8 +1104,7 @@ define i1 @abs_intrin_eq_zero(i8 %x) { define i1 @abs_intrin_ne_zero(i8 %x) { ; CHECK-LABEL: @abs_intrin_ne_zero( -; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[ABS]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) From 73104b0751a1c1dd499550bf44e47d29882fbb32 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 4 Sep 2020 23:44:58 +0200 Subject: [PATCH 252/465] [InstSimplify] Fold min/max based on dominating condition If we have a dominating condition that x >= y, then umax(x, y) is x, etc. I'm doing this in InstSimplify as the corresponding transform for the select form is also done there. Differential Revision: https://reviews.llvm.org/D87168 --- llvm/lib/Analysis/InstructionSimplify.cpp | 7 ++++ .../InstSimplify/maxmin_intrinsics.ll | 42 +++++++------------ 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 843f0608a963b..3139b5a96b27d 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5326,6 +5326,13 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, if (isICmpTrue(Pred, Op1, Op0, Q.getWithoutUndef(), RecursionLimit)) return Op1; + if (Optional Imp = + isImpliedByDomCondition(Pred, Op0, Op1, Q.CxtI, Q.DL)) + return *Imp ? Op0 : Op1; + if (Optional Imp = + isImpliedByDomCondition(Pred, Op1, Op0, Q.CxtI, Q.DL)) + return *Imp ? Op1 : Op0; + break; } case Intrinsic::usub_with_overflow: diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index d1d711c1c1bd1..d646334887b23 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -2139,11 +2139,9 @@ define i8 @umax_dom_cond_uge(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M1]] +; CHECK-NEXT: ret i8 [[X]] ; CHECK: false: -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[Y]] ; %cmp = icmp uge i8 %x, %y br i1 %cmp, label %true, label %false @@ -2162,11 +2160,9 @@ define i8 @umax_dom_cond_ugt(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M1]] +; CHECK-NEXT: ret i8 [[X]] ; CHECK: false: -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[Y]] ; %cmp = icmp ugt i8 %x, %y br i1 %cmp, label %true, label %false @@ -2185,11 +2181,9 @@ define i8 @umax_dom_cond_ule(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M1]] +; CHECK-NEXT: ret i8 [[Y]] ; CHECK: false: -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X]] ; %cmp = icmp ule i8 %x, %y br i1 %cmp, label %true, label %false @@ -2208,11 +2202,9 @@ define i8 @umax_dom_cond_ult(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M1]] +; CHECK-NEXT: ret i8 [[Y]] ; CHECK: false: -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X]] ; %cmp = icmp ult i8 %x, %y br i1 %cmp, label %true, label %false @@ -2231,11 +2223,9 @@ define i8 @umin_dom_cond_uge(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M1]] +; CHECK-NEXT: ret i8 [[Y]] ; CHECK: false: -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X]] ; %cmp = icmp uge i8 %x, %y br i1 %cmp, label %true, label %false @@ -2254,11 +2244,9 @@ define i8 @smax_dom_cond_sge(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M1]] +; CHECK-NEXT: ret i8 [[X]] ; CHECK: false: -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[Y]] ; %cmp = icmp sge i8 %x, %y br i1 %cmp, label %true, label %false @@ -2277,11 +2265,9 @@ define i8 @smin_dom_cond_sge(i8 %x, i8 %y) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M1]] +; CHECK-NEXT: ret i8 [[Y]] ; CHECK: false: -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 [[Y]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X]] ; %cmp = icmp sge i8 %x, %y br i1 %cmp, label %true, label %false From 4892d3a1983b0fae83e9476d8cec1d139da7eae0 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 11:09:06 +0200 Subject: [PATCH 253/465] [InstCombine] Fold abs with dominating condition Similar to D87168, but for abs. If we have a dominating x >= 0 condition, then we know that abs(x) is x. This fold is in InstCombine, because we need to create a sub instruction for the x < 0 case. Differential Revision: https://reviews.llvm.org/D87184 --- .../Transforms/InstCombine/InstCombineCalls.cpp | 15 +++++++++++++++ llvm/test/Transforms/InstCombine/abs-intrinsic.ll | 10 ++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 311a18c7f584d..40f6e9e147d76 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -779,6 +779,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return nullptr; case Intrinsic::abs: { Value *IIOperand = II->getArgOperand(0); + bool IntMinIsPoison = cast(II->getArgOperand(1))->isOneValue(); + // abs(-x) -> abs(x) // TODO: Copy nsw if it was present on the neg? Value *X; @@ -789,6 +791,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X)))) return replaceOperand(*II, 0, X); + if (Optional Imp = isImpliedByDomCondition( + ICmpInst::ICMP_SGE, IIOperand, + Constant::getNullValue(IIOperand->getType()), II, DL)) { + // abs(x) -> x if x >= 0 + if (*Imp) + return replaceInstUsesWith(*II, IIOperand); + + // abs(x) -> -x if x < 0 + if (IntMinIsPoison) + return BinaryOperator::CreateNSWNeg(IIOperand); + return BinaryOperator::CreateNeg(IIOperand); + } + break; } case Intrinsic::bswap: { diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index 7442ed7be1131..c39424aa75ba8 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -132,10 +132,9 @@ define i32 @abs_dom_cond_nopoison(i32 %x) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[A1:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) -; CHECK-NEXT: ret i32 [[A1]] +; CHECK-NEXT: ret i32 [[X]] ; CHECK: false: -; CHECK-NEXT: [[A2:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NEXT: [[A2:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: ret i32 [[A2]] ; %cmp = icmp sge i32 %x, 0 @@ -155,10 +154,9 @@ define i32 @abs_dom_cond_poison(i32 %x) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[A1:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 true) -; CHECK-NEXT: ret i32 [[A1]] +; CHECK-NEXT: ret i32 [[X]] ; CHECK: false: -; CHECK-NEXT: [[A2:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 true) +; CHECK-NEXT: [[A2:%.*]] = sub nsw i32 0, [[X]] ; CHECK-NEXT: ret i32 [[A2]] ; %cmp = icmp sge i32 %x, 0 From ac87480bd8beef0a4e93981e38df2c21652e1393 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 4 Sep 2020 22:06:52 +0200 Subject: [PATCH 254/465] [SCEV] Recognize min/max intrinsics Recognize umin/umax/smin/smax intrinsics and convert them to the already existing SCEV nodes of the same name. In the future we'll want SCEVExpander to also produce the intrinsics, but we're not ready for that yet. Differential Revision: https://reviews.llvm.org/D87160 --- llvm/lib/Analysis/ScalarEvolution.cpp | 19 +++++++++++++++++++ .../ScalarEvolution/minmax-intrinsics.ll | 12 ++++++------ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 9c9b9c53c939f..40d89fff04587 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6341,6 +6341,25 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case Instruction::Invoke: if (Value *RV = cast(U)->getReturnedArgOperand()) return getSCEV(RV); + + if (auto *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + case Intrinsic::umax: + return getUMaxExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + case Intrinsic::umin: + return getUMinExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + case Intrinsic::smax: + return getSMaxExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + case Intrinsic::smin: + return getSMinExpr(getSCEV(II->getArgOperand(0)), + getSCEV(II->getArgOperand(1))); + default: + break; + } + } break; } diff --git a/llvm/test/Analysis/ScalarEvolution/minmax-intrinsics.ll b/llvm/test/Analysis/ScalarEvolution/minmax-intrinsics.ll index c0395c328fca3..86ba0b7b658ef 100644 --- a/llvm/test/Analysis/ScalarEvolution/minmax-intrinsics.ll +++ b/llvm/test/Analysis/ScalarEvolution/minmax-intrinsics.ll @@ -11,7 +11,7 @@ define i32 @umax(i32 %x, i32 %y) { ; CHECK-LABEL: 'umax' ; CHECK-NEXT: Classifying expressions for: @umax ; CHECK-NEXT: %z = call i32 @llvm.umax.i32(i32 %x, i32 %y) -; CHECK-NEXT: --> %z U: full-set S: full-set +; CHECK-NEXT: --> (%x umax %y) U: full-set S: full-set ; CHECK-NEXT: Determining loop execution counts for: @umax ; %z = call i32 @llvm.umax.i32(i32 %x, i32 %y) @@ -22,7 +22,7 @@ define i32 @umin(i32 %x, i32 %y) { ; CHECK-LABEL: 'umin' ; CHECK-NEXT: Classifying expressions for: @umin ; CHECK-NEXT: %z = call i32 @llvm.umin.i32(i32 %x, i32 %y) -; CHECK-NEXT: --> %z U: full-set S: full-set +; CHECK-NEXT: --> (%x umin %y) U: full-set S: full-set ; CHECK-NEXT: Determining loop execution counts for: @umin ; %z = call i32 @llvm.umin.i32(i32 %x, i32 %y) @@ -33,7 +33,7 @@ define i32 @smax(i32 %x, i32 %y) { ; CHECK-LABEL: 'smax' ; CHECK-NEXT: Classifying expressions for: @smax ; CHECK-NEXT: %z = call i32 @llvm.smax.i32(i32 %x, i32 %y) -; CHECK-NEXT: --> %z U: full-set S: full-set +; CHECK-NEXT: --> (%x smax %y) U: full-set S: full-set ; CHECK-NEXT: Determining loop execution counts for: @smax ; %z = call i32 @llvm.smax.i32(i32 %x, i32 %y) @@ -44,7 +44,7 @@ define i32 @smin(i32 %x, i32 %y) { ; CHECK-LABEL: 'smin' ; CHECK-NEXT: Classifying expressions for: @smin ; CHECK-NEXT: %z = call i32 @llvm.smin.i32(i32 %x, i32 %y) -; CHECK-NEXT: --> %z U: full-set S: full-set +; CHECK-NEXT: --> (%x smin %y) U: full-set S: full-set ; CHECK-NEXT: Determining loop execution counts for: @smin ; %z = call i32 @llvm.smin.i32(i32 %x, i32 %y) @@ -55,9 +55,9 @@ define i32 @clamp(i32 %x) { ; CHECK-LABEL: 'clamp' ; CHECK-NEXT: Classifying expressions for: @clamp ; CHECK-NEXT: %y = call i32 @llvm.umax.i32(i32 %x, i32 10) -; CHECK-NEXT: --> %y U: full-set S: full-set +; CHECK-NEXT: --> (10 umax %x) U: [10,0) S: [10,0) ; CHECK-NEXT: %z = call i32 @llvm.umin.i32(i32 %y, i32 20) -; CHECK-NEXT: --> %z U: full-set S: full-set +; CHECK-NEXT: --> (20 umin (10 umax %x)) U: [10,21) S: [10,21) ; CHECK-NEXT: Determining loop execution counts for: @clamp ; %y = call i32 @llvm.umax.i32(i32 %x, i32 10) From 9dcc82f34ea9b623d82d2577b93aaf67d36dabd2 Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Sat, 5 Sep 2020 15:44:20 +0200 Subject: [PATCH 255/465] Thread safety analysis: Consider global variables in scope Instead of just mutex members we also consider mutex globals. Unsurprisingly they are always in scope. Now the paper [1] says that > The scope of a class member is assumed to be its enclosing class, > while the scope of a global variable is the translation unit in > which it is defined. But I don't think we should limit this to TUs where a definition is available - a declaration is enough to acquire the mutex, and if a mutex is really limited in scope to a translation unit, it should probably be only declared there. [1] https://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/42958.pdf Fixes PR46354. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D84604 --- clang/lib/Analysis/ThreadSafety.cpp | 13 +++++++-- .../SemaCXX/warn-thread-safety-analysis.cpp | 7 +++-- .../SemaCXX/warn-thread-safety-negative.cpp | 29 +++++++++++++++++++ 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index 64e0da9e64b12..1d4aabaaeb57e 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -1266,13 +1266,22 @@ ClassifyDiagnostic(const AttrTy *A) { } bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { - if (!CurrentMethod) + const threadSafety::til::SExpr *SExp = CapE.sexpr(); + assert(SExp && "Null expressions should be ignored"); + + // Global variables are always in scope. + if (isa(SExp)) + return true; + + // Members are in scope from methods of the same class. + if (const auto *P = dyn_cast(SExp)) { + if (!CurrentMethod) return false; - if (const auto *P = dyn_cast_or_null(CapE.sexpr())) { const auto *VD = P->clangDecl(); if (VD) return VD->getDeclContext() == CurrentMethod->getDeclContext(); } + return false; } diff --git a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp index 91bd15def577d..d1520b1decbd3 100644 --- a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp @@ -5036,7 +5036,8 @@ void spawn_fake_flight_control_thread(void) { } extern const char *deque_log_msg(void) __attribute__((requires_capability(Logger))); -void logger_entry(void) __attribute__((requires_capability(Logger))) { +void logger_entry(void) __attribute__((requires_capability(Logger))) + __attribute__((requires_capability(!FlightControl))) { const char *msg; while ((msg = deque_log_msg())) { @@ -5044,13 +5045,13 @@ void logger_entry(void) __attribute__((requires_capability(Logger))) { } } -void spawn_fake_logger_thread(void) { +void spawn_fake_logger_thread(void) __attribute__((requires_capability(!FlightControl))) { acquire(Logger); logger_entry(); release(Logger); } -int main(void) { +int main(void) __attribute__((requires_capability(!FlightControl))) { spawn_fake_flight_control_thread(); spawn_fake_logger_thread(); diff --git a/clang/test/SemaCXX/warn-thread-safety-negative.cpp b/clang/test/SemaCXX/warn-thread-safety-negative.cpp index 456fe16e6574e..68e30f4a3225b 100644 --- a/clang/test/SemaCXX/warn-thread-safety-negative.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-negative.cpp @@ -81,6 +81,35 @@ class Foo { } // end namespace SimpleTest +Mutex globalMutex; + +namespace ScopeTest { + +void f() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex); +void fq() EXCLUSIVE_LOCKS_REQUIRED(!::globalMutex); + +namespace ns { + Mutex globalMutex; + void f() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex); + void fq() EXCLUSIVE_LOCKS_REQUIRED(!ns::globalMutex); +} + +void testGlobals() EXCLUSIVE_LOCKS_REQUIRED(!ns::globalMutex) { + f(); // expected-warning {{calling function 'f' requires negative capability '!globalMutex'}} + fq(); // expected-warning {{calling function 'fq' requires negative capability '!globalMutex'}} + ns::f(); + ns::fq(); +} + +void testNamespaceGlobals() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex) { + f(); + fq(); + ns::f(); // expected-warning {{calling function 'f' requires negative capability '!globalMutex'}} + ns::fq(); // expected-warning {{calling function 'fq' requires negative capability '!globalMutex'}} +} + +} // end namespace ScopeTest + namespace DoubleAttribute { struct Foo { From b2ce79ef66157dd752e3864ece57915e23a73f5d Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Sun, 26 Jul 2020 01:53:32 +0200 Subject: [PATCH 256/465] Thread safety analysis: ValueDecl in Project is non-null The constructor asserts that, use it in the ThreadSafetyAnalyzer. Also note that the result of a cast<> cannot be null. --- clang/lib/Analysis/ThreadSafety.cpp | 5 ++--- clang/lib/Analysis/ThreadSafetyCommon.cpp | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index 1d4aabaaeb57e..5b97265a6d8ae 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -1277,9 +1277,8 @@ bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { if (const auto *P = dyn_cast(SExp)) { if (!CurrentMethod) return false; - const auto *VD = P->clangDecl(); - if (VD) - return VD->getDeclContext() == CurrentMethod->getDeclContext(); + const ValueDecl *VD = P->clangDecl(); + return VD->getDeclContext() == CurrentMethod->getDeclContext(); } return false; diff --git a/clang/lib/Analysis/ThreadSafetyCommon.cpp b/clang/lib/Analysis/ThreadSafetyCommon.cpp index 1b8c55e56d470..aee9185760071 100644 --- a/clang/lib/Analysis/ThreadSafetyCommon.cpp +++ b/clang/lib/Analysis/ThreadSafetyCommon.cpp @@ -274,7 +274,7 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, const auto *VD = cast(DRE->getDecl()->getCanonicalDecl()); // Function parameters require substitution and/or renaming. - if (const auto *PV = dyn_cast_or_null(VD)) { + if (const auto *PV = dyn_cast(VD)) { unsigned I = PV->getFunctionScopeIndex(); const DeclContext *D = PV->getDeclContext(); if (Ctx && Ctx->FunArgs) { From 5ad6552a836ef759ff8a96ffec333aacabb8dc36 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 17:14:13 +0200 Subject: [PATCH 257/465] [InstCombine] Add tests for known negative abs intrinsic (NFC) And duplicate tests for known non-negative from InstSimplify. --- .../Transforms/InstCombine/abs-intrinsic.ll | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index c39424aa75ba8..b00681d44d26c 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -3,6 +3,8 @@ declare i32 @llvm.abs.i32(i32, i1) declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) +declare <3 x i82> @llvm.abs.v3i82(<3 x i82>, i1) +declare void @llvm.assume(i1) ; abs preserves trailing zeros so the second and is unneeded define i32 @abs_trailing_zeros(i32 %x) { @@ -170,3 +172,85 @@ false: %a2 = call i32 @llvm.abs.i32(i32 %x, i1 true) ret i32 %a2 } + +; Abs argument non-neg based on known bits. + +define i32 @zext_abs(i31 %x) { +; CHECK-LABEL: @zext_abs( +; CHECK-NEXT: [[ZEXT:%.*]] = zext i31 [[X:%.*]] to i32 +; CHECK-NEXT: ret i32 [[ZEXT]] +; + %zext = zext i31 %x to i32 + %abs = call i32 @llvm.abs.i32(i32 %zext, i1 false) + ret i32 %abs +} + +define <3 x i82> @lshr_abs(<3 x i82> %x) { +; CHECK-LABEL: @lshr_abs( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], +; CHECK-NEXT: ret <3 x i82> [[LSHR]] +; + %lshr = lshr <3 x i82> %x, + %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %lshr, i1 true) + ret <3 x i82> %abs +} + +define i32 @and_abs(i32 %x) { +; CHECK-LABEL: @and_abs( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 2147483644 +; CHECK-NEXT: ret i32 [[AND]] +; + %and = and i32 %x, 2147483644 + %abs = call i32 @llvm.abs.i32(i32 %and, i1 true) + ret i32 %abs +} + +define <3 x i82> @select_abs(<3 x i1> %cond) { +; CHECK-LABEL: @select_abs( +; CHECK-NEXT: [[SEL:%.*]] = select <3 x i1> [[COND:%.*]], <3 x i82> zeroinitializer, <3 x i82> +; CHECK-NEXT: ret <3 x i82> [[SEL]] +; + %sel = select <3 x i1> %cond, <3 x i82> zeroinitializer, <3 x i82> + %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %sel, i1 false) + ret <3 x i82> %abs +} + +define i32 @assume_abs(i32 %x) { +; CHECK-LABEL: @assume_abs( +; CHECK-NEXT: [[ASSUME:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[ASSUME]]) +; CHECK-NEXT: ret i32 [[X]] +; + %assume = icmp sge i32 %x, 0 + call void @llvm.assume(i1 %assume) + %abs = call i32 @llvm.abs.i32(i32 %x, i1 true) + ret i32 %abs +} + +; Abs argument negative based on known bits. + +define i32 @abs_assume_neg(i32 %x) { +; CHECK-LABEL: @abs_assume_neg( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NEXT: ret i32 [[ABS]] +; + %cmp = icmp slt i32 %x, 0 + call void @llvm.assume(i1 %cmp) + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + ret i32 %abs +} + +define i32 @abs_known_neg(i16 %x) { +; CHECK-LABEL: @abs_known_neg( +; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[X:%.*]] to i32 +; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[EXT]], -1 +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[NEG]], i1 false) +; CHECK-NEXT: ret i32 [[ABS]] +; + %ext = zext i16 %x to i32 + %neg = sub nsw i32 -1, %ext + %abs = call i32 @llvm.abs.i32(i32 %neg, i1 false) + ret i32 %abs +} From d3a779fe21500457e95c8c4e963638b93e3bcc71 Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Sat, 5 Sep 2020 17:52:57 +0200 Subject: [PATCH 258/465] Restore size of TemplateParameterList after D44352 After adding a field of one bit, the bitfield members would take 30+1+1+1 = 33 bits, causing the size of TemplateParameterList to increase from 16 to 24 bytes on 64-bit systems. With 29 bits for NumParams we can encode up to half a billion template parameters, which is almost certainly still enough for anybody. --- clang/include/clang/AST/DeclTemplate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 4feb1d45251d5..9e22543761501 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -77,7 +77,7 @@ class TemplateParameterList final /// The number of template parameters in this template /// parameter list. - unsigned NumParams : 30; + unsigned NumParams : 29; /// Whether this template parameter list contains an unexpanded parameter /// pack. From da6b3aa4c6bb29a150628ad489274466c6b8ace0 Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Sat, 5 Sep 2020 18:25:27 +0200 Subject: [PATCH 259/465] Attempt to fix Sphinx build failure, NFC A code block wasn't properly introduced. --- .../clang-tidy/checks/bugprone-redundant-branch-condition.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-redundant-branch-condition.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-redundant-branch-condition.rst index 8bc97f4114ae5..c2746914e754a 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-redundant-branch-condition.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-redundant-branch-condition.rst @@ -83,6 +83,8 @@ Known limitations The ``else`` branch is not checked currently for negated condition variable: +.. code-block:: c + bool onFire = isBurning(); if (onFire) { scream(); From bef38e86b4e702a0c42e243d323fb7c09875e649 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Thu, 20 Aug 2020 18:10:18 +0100 Subject: [PATCH 260/465] [ELF] Handle SHT_RISCV_ATTRIBUTES similarly to SHT_ARM_ATTRIBUTES Currently we treat SHT_RISCV_ATTRIBUTES like a normal section and concatenate all such input sections, yielding invalid output unless only a single attributes section is present in the input. Instead, pick the first as with SHT_ARM_ATTRIBUTES. We do not currently need to condition our behaviour on the contents, unlike Arm. In future, we should both do stricter validation of the input and merge all sections together to ensure we have, for example, the full arch string requirement, but this rudimentary implementation is good enough for most common cases. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D86309 --- lld/ELF/InputFiles.cpp | 53 +++++++++++++++++++++++---------- lld/ELF/SyntheticSections.h | 2 +- lld/test/ELF/riscv-attributes.s | 33 ++++++++++++++++++++ 3 files changed, 72 insertions(+), 16 deletions(-) create mode 100644 lld/test/ELF/riscv-attributes.s diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index bfc8e9c1e53b1..acdb5c71efb96 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" +#include "llvm/Support/RISCVAttributeParser.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/raw_ostream.h" @@ -867,10 +868,7 @@ template InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &sec) { StringRef name = getSectionName(sec); - switch (sec.sh_type) { - case SHT_ARM_ATTRIBUTES: { - if (config->emachine != EM_ARM) - break; + if (config->emachine == EM_ARM && sec.sh_type == SHT_ARM_ATTRIBUTES) { ARMAttributeParser attributes; ArrayRef contents = check(this->getObj().getSectionContents(&sec)); if (Error e = attributes.parse(contents, config->ekind == ELF32LEKind @@ -878,20 +876,45 @@ InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &sec) { : support::big)) { auto *isec = make(*this, sec, name); warn(toString(isec) + ": " + llvm::toString(std::move(e))); - break; + } else { + updateSupportedARMFeatures(attributes); + updateARMVFPArgs(attributes, this); + + // FIXME: Retain the first attribute section we see. The eglibc ARM + // dynamic loaders require the presence of an attribute section for dlopen + // to work. In a full implementation we would merge all attribute + // sections. + if (in.attributes == nullptr) { + in.attributes = make(*this, sec, name); + return in.attributes; + } + return &InputSection::discarded; } - updateSupportedARMFeatures(attributes); - updateARMVFPArgs(attributes, this); - - // FIXME: Retain the first attribute section we see. The eglibc ARM - // dynamic loaders require the presence of an attribute section for dlopen - // to work. In a full implementation we would merge all attribute sections. - if (in.armAttributes == nullptr) { - in.armAttributes = make(*this, sec, name); - return in.armAttributes; + } + + if (config->emachine == EM_RISCV && sec.sh_type == SHT_RISCV_ATTRIBUTES) { + RISCVAttributeParser attributes; + ArrayRef contents = check(this->getObj().getSectionContents(&sec)); + if (Error e = attributes.parse(contents, support::little)) { + auto *isec = make(*this, sec, name); + warn(toString(isec) + ": " + llvm::toString(std::move(e))); + } else { + // FIXME: Validate arch tag contains C if and only if EF_RISCV_RVC is + // present. + + // FIXME: Retain the first attribute section we see. Tools such as + // llvm-objdump make use of the attribute section to determine which + // standard extensions to enable. In a full implementation we would merge + // all attribute sections. + if (in.attributes == nullptr) { + in.attributes = make(*this, sec, name); + return in.attributes; + } + return &InputSection::discarded; } - return &InputSection::discarded; } + + switch (sec.sh_type) { case SHT_LLVM_DEPENDENT_LIBRARIES: { if (config->relocatable) break; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 7779efcd5fe5a..8943596179c17 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -1200,7 +1200,7 @@ inline Partition &SectionBase::getPartition() const { // Linker generated sections which can be used as inputs and are not specific to // a partition. struct InStruct { - InputSection *armAttributes; + InputSection *attributes; BssSection *bss; BssSection *bssRelRo; GotSection *got; diff --git a/lld/test/ELF/riscv-attributes.s b/lld/test/ELF/riscv-attributes.s new file mode 100644 index 0000000000000..36e506a1df662 --- /dev/null +++ b/lld/test/ELF/riscv-attributes.s @@ -0,0 +1,33 @@ +# REQUIRES: riscv + +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=-relax %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf --arch-specific %t | FileCheck %s +# RUN: ld.lld %t.o %t.o -o %t2 +# RUN: llvm-readelf --arch-specific %t2 | FileCheck %s + +# CHECK: BuildAttributes { +# CHECK-NEXT: FormatVersion: 0x41 +# CHECK-NEXT: Section 1 { +# CHECK-NEXT: SectionLength: 52 +# CHECK-NEXT: Vendor: riscv +# CHECK-NEXT: Tag: Tag_File (0x1) +# CHECK-NEXT: Size: 42 +# CHECK-NEXT: FileAttributes { +# CHECK-NEXT: Attribute { +# CHECK-NEXT: Tag: 4 +# CHECK-NEXT: Value: 16 +# CHECK-NEXT: TagName: stack_align +# CHECK-NEXT: Description: Stack alignment is 16-bytes +# CHECK-NEXT: } +# CHECK-NEXT: Attribute { +# CHECK-NEXT: Tag: 5 +# CHECK-NEXT: TagName: arch +# CHECK-NEXT: Value: rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0 +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT: } + +.attribute 4, 16 +.attribute 5, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0" From 0c642828612dbde30decff6fb080af4de9a173bd Mon Sep 17 00:00:00 2001 From: Lawrence D'Anna Date: Sat, 5 Sep 2020 11:09:21 -0700 Subject: [PATCH 261/465] scan-build-py: fix multiprocessing error Recent versions of python3's multiprocessing module will blow up with a Runtime error from this code, saying: An attempt has been made to start a new process before the current process has finished its bootstrapping phase This is becuae the wrappers in bin/ are not using the `__name__ == "__main__"` idiom correctly. Reviewed By: ldionne Differential Revision: https://reviews.llvm.org/D87051 --- clang/tools/scan-build-py/bin/analyze-build | 7 ++++--- clang/tools/scan-build-py/bin/intercept-build | 7 ++++--- clang/tools/scan-build-py/bin/scan-build | 7 ++++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/clang/tools/scan-build-py/bin/analyze-build b/clang/tools/scan-build-py/bin/analyze-build index 6c285874a2083..0884ef2234bf4 100755 --- a/clang/tools/scan-build-py/bin/analyze-build +++ b/clang/tools/scan-build-py/bin/analyze-build @@ -5,12 +5,13 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import multiprocessing -multiprocessing.freeze_support() - import sys import os.path this_dir = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.dirname(this_dir)) from libscanbuild.analyze import analyze_build -sys.exit(analyze_build()) + +if __name__ == '__main__': + multiprocessing.freeze_support() + sys.exit(analyze_build()) diff --git a/clang/tools/scan-build-py/bin/intercept-build b/clang/tools/scan-build-py/bin/intercept-build index 23f5104782ca7..d9757b77b5c73 100755 --- a/clang/tools/scan-build-py/bin/intercept-build +++ b/clang/tools/scan-build-py/bin/intercept-build @@ -5,12 +5,13 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import multiprocessing -multiprocessing.freeze_support() - import sys import os.path this_dir = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.dirname(this_dir)) from libscanbuild.intercept import intercept_build -sys.exit(intercept_build()) + +if __name__ == '__main__': + multiprocessing.freeze_support() + sys.exit(intercept_build()) diff --git a/clang/tools/scan-build-py/bin/scan-build b/clang/tools/scan-build-py/bin/scan-build index 156da064a2b47..be4e51887e30b 100755 --- a/clang/tools/scan-build-py/bin/scan-build +++ b/clang/tools/scan-build-py/bin/scan-build @@ -5,12 +5,13 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import multiprocessing -multiprocessing.freeze_support() - import sys import os.path this_dir = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.dirname(this_dir)) from libscanbuild.analyze import scan_build -sys.exit(scan_build()) + +if __name__ == '__main__': + multiprocessing.freeze_support() + sys.exit(scan_build()) From 1ddb3a369f7ebdf738486cd60261c3143658c0e6 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Sep 2020 20:43:28 +0100 Subject: [PATCH 262/465] [LangRef] Adjust guarantee for llvm.memcpy to also allow equal arguments. This adjusts the description of `llvm.memcpy` to also allow operands to be equal. This is in line with what Clang currently expects. This change is intended to be temporary and followed by re-introduce a variant with the non-overlapping guarantee for cases where we can actually ensure that property in the front-end. See the links below for more details: http://lists.llvm.org/pipermail/cfe-dev/2020-August/066614.html and PR11763. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D86815 --- llvm/docs/LangRef.rst | 10 ++-- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 22 +++------ llvm/test/Analysis/BasicAA/assume.ll | 4 +- llvm/test/Analysis/BasicAA/cs-cs.ll | 48 +++++++++---------- llvm/test/Analysis/BasicAA/guards.ll | 4 +- .../test/Analysis/MemorySSA/basicaa-memcpy.ll | 2 +- .../MSSA/memset-and-memcpy.ll | 12 ++--- .../DeadStoreElimination/MSSA/simple.ll | 12 +++-- 8 files changed, 55 insertions(+), 59 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 2bd7d09f698db..781b2385de500 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12479,11 +12479,11 @@ very cleanly specified and it is unwise to depend on it. Semantics: """""""""" -The '``llvm.memcpy.*``' intrinsics copy a block of memory from the -source location to the destination location, which are not allowed to -overlap. It copies "len" bytes of memory over. If the argument is known -to be aligned to some boundary, this can be specified as an attribute on -the argument. +The '``llvm.memcpy.*``' intrinsics copy a block of memory from the source +location to the destination location, which must either be equal or +non-overlapping. It copies "len" bytes of memory over. If the argument is known +to be aligned to some boundary, this can be specified as an attribute on the +argument. If "len" is 0, the pointers may be NULL, dangling, ``undef``, or ``poison`` pointers. However, they must still be appropriately aligned. diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 2ede4baaf6832..ca043b415b107 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -975,22 +975,14 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call, return ModRefInfo::NoModRef; } - // The semantics of memcpy intrinsics forbid overlap between their respective - // operands, i.e., source and destination of any given memcpy must no-alias. - // If Loc must-aliases either one of these two locations, then it necessarily - // no-aliases the other. + // The semantics of memcpy intrinsics either exactly overlap or do not + // overlap, i.e., source and destination of any given memcpy are either + // no-alias or must-alias. if (auto *Inst = dyn_cast(Call)) { - AliasResult SrcAA, DestAA; - - if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst), - Loc, AAQI)) == MustAlias) - // Loc is exactly the memcpy source thus disjoint from memcpy dest. - return ModRefInfo::Ref; - if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst), - Loc, AAQI)) == MustAlias) - // The converse case. - return ModRefInfo::Mod; - + AliasResult SrcAA = + getBestAAResults().alias(MemoryLocation::getForSource(Inst), Loc, AAQI); + AliasResult DestAA = + getBestAAResults().alias(MemoryLocation::getForDest(Inst), Loc, AAQI); // It's also possible for Loc to alias both src and dest, or neither. ModRefInfo rv = ModRefInfo::NoModRef; if (SrcAA != NoAlias) diff --git a/llvm/test/Analysis/BasicAA/assume.ll b/llvm/test/Analysis/BasicAA/assume.ll index 1a7de5aa6afef..fe83a8c3df0e3 100644 --- a/llvm/test/Analysis/BasicAA/assume.ll +++ b/llvm/test/Analysis/BasicAA/assume.ll @@ -14,8 +14,8 @@ define void @test1(i8* %P, i8* %Q) nounwind ssp { ; CHECK: MayAlias: i8* %P, i8* %Q ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.assume(i1 true) ; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.assume(i1 true) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ; CHECK: NoModRef: tail call void @llvm.assume(i1 true) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.assume(i1 true) } diff --git a/llvm/test/Analysis/BasicAA/cs-cs.ll b/llvm/test/Analysis/BasicAA/cs-cs.ll index fa7a5f972aafe..49eedd4279374 100644 --- a/llvm/test/Analysis/BasicAA/cs-cs.ll +++ b/llvm/test/Analysis/BasicAA/cs-cs.ll @@ -17,12 +17,12 @@ define void @test2(i8* %P, i8* %Q) #3 { ; CHECK-LABEL: Function: test2: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2_atomic(i8* %P, i8* %Q) #3 { @@ -33,12 +33,12 @@ define void @test2_atomic(i8* %P, i8* %Q) #3 { ; CHECK-LABEL: Function: test2_atomic: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) -; CHECK: Just Mod: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) -; CHECK: Just Mod: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) +; CHECK: Both ModRef: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) <-> tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) } define void @test2a(i8* noalias %P, i8* noalias %Q) #3 { @@ -149,12 +149,12 @@ define void @test3(i8* %P, i8* %Q) #3 { ; CHECK-LABEL: Function: test3: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) } define void @test3a(i8* noalias %P, i8* noalias %Q) #3 { @@ -199,14 +199,14 @@ define void @test5(i8* %P, i8* %Q, i8* %R) #3 { ; CHECK: MayAlias: i8* %P, i8* %Q ; CHECK: MayAlias: i8* %P, i8* %R ; CHECK: MayAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) ; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test5a(i8* noalias %P, i8* noalias %Q, i8* noalias %R) nounwind ssp { diff --git a/llvm/test/Analysis/BasicAA/guards.ll b/llvm/test/Analysis/BasicAA/guards.ll index e6cee5f6d1e44..66293503ed4ac 100644 --- a/llvm/test/Analysis/BasicAA/guards.ll +++ b/llvm/test/Analysis/BasicAA/guards.ll @@ -14,8 +14,8 @@ define void @test1(i8* %P, i8* %Q) { ; CHECK: Just Ref: Ptr: i8* %P <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ; CHECK: Just Ref: tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] } diff --git a/llvm/test/Analysis/MemorySSA/basicaa-memcpy.ll b/llvm/test/Analysis/MemorySSA/basicaa-memcpy.ll index 83a5544768475..678b652c61103 100644 --- a/llvm/test/Analysis/MemorySSA/basicaa-memcpy.ll +++ b/llvm/test/Analysis/MemorySSA/basicaa-memcpy.ll @@ -6,7 +6,7 @@ define void @source_clobber(i8* %a, i8* %b) { ; CHECK-LABEL: @source_clobber( ; CHECK-NEXT: ; 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 false) -; CHECK-NEXT: ; MemoryUse(liveOnEntry) +; CHECK-NEXT: ; MemoryUse(1) MayAlias ; CHECK-NEXT: [[X:%.*]] = load i8, i8* %b ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll index d54fe130d05f6..5aeda18309724 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll @@ -68,13 +68,12 @@ define void @test17v(i8* %P, i8* %Q) nounwind ssp { ret void } -; According to the current LangRef, memcpy's source and destination cannot -; overlap, hence the first memcpy is dead. -; -; Previously this was not allowed (PR8728), also discussed in PR11763. +; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not +; inequal and overlapping). define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { ; CHECK-LABEL: @test18( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) @@ -84,7 +83,8 @@ define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp { ; CHECK-LABEL: @test18_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll index 1ac6ad7d81586..9f719746f9f17 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -554,10 +554,12 @@ define void @test37_atomic(i8* %P, i8* %Q, i8* %R) { ret void } -; The memmove is dead, because memcpy arguments cannot overlap. +; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not +; inequal and overlapping). define void @test38(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test38( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; @@ -566,10 +568,12 @@ define void @test38(i8* %P, i8* %Q, i8* %R) { ret void } -; The memmove is dead, because memcpy arguments cannot overlap. +; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not +; inequal and overlapping). define void @test38_atomic(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test38_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; From 621b10ca187bdd6de691338e48b288ea1c6a5822 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 21:40:53 +0200 Subject: [PATCH 263/465] [InstSimplify] Add tests for a peculiar abs of abs form (NFC) This pattern shows up when canonicalizing to spf abs form to intrinsic abs form. --- .../Transforms/InstSimplify/abs_intrinsic.ll | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll index aa299c1084b7e..e9305a927c42c 100644 --- a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll +++ b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll @@ -201,3 +201,63 @@ define i1 @abs_ule_int_min(i8 %x) { %c = icmp ule i8 %abs, 128 ret i1 %c } + +define i32 @select_abs_of_abs_eq(i32 %x) { +; CHECK-LABEL: @select_abs_of_abs_eq( +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[ABS]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[NEG]], i32 [[ABS]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %neg = sub i32 0, %abs + %cmp = icmp eq i32 %x, 0 + %sel = select i1 %cmp, i32 %neg, i32 %abs + ret i32 %sel +} + +define i32 @select_abs_of_abs_ne(i32 %x) { +; CHECK-LABEL: @select_abs_of_abs_ne( +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[ABS]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[ABS]], i32 [[NEG]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %neg = sub i32 0, %abs + %cmp = icmp ne i32 %x, 0 + %sel = select i1 %cmp, i32 %abs, i32 %neg + ret i32 %sel +} + +define i32 @select_nabs_of_abs_eq(i32 %x) { +; CHECK-LABEL: @select_nabs_of_abs_eq( +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[ABS]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[ABS]], i32 [[NEG]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %neg = sub i32 0, %abs + %cmp = icmp eq i32 %x, 0 + %sel = select i1 %cmp, i32 %abs, i32 %neg + ret i32 %sel +} + +define i32 @select_nabs_of_abs_ne(i32 %x) { +; CHECK-LABEL: @select_nabs_of_abs_ne( +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[ABS]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[NEG]], i32 [[ABS]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %neg = sub i32 0, %abs + %cmp = icmp ne i32 %x, 0 + %sel = select i1 %cmp, i32 %neg, i32 %abs + ret i32 %sel +} From 1387f96ab3310678df62c1073346ca387a85f656 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 4 Sep 2020 19:33:14 -0500 Subject: [PATCH 264/465] [Hexagon] Handle widening of vector truncate --- .../Target/Hexagon/HexagonISelLowering.cpp | 9 +-- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 8 ++- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 65 +++++++++++++++---- llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 11 +++- .../CodeGen/Hexagon/autohvx/isel-truncate.ll | 4 +- ...ort-store-widen.ll => isel-widen-store.ll} | 0 .../Hexagon/autohvx/isel-widen-truncate.ll | 24 +++++++ 7 files changed, 99 insertions(+), 22 deletions(-) rename llvm/test/CodeGen/Hexagon/autohvx/{short-store-widen.ll => isel-widen-store.ll} (100%) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 7b1c8b6079a0e..645d28de2b20d 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1862,6 +1862,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST"; case HexagonISD::VALIGN: return "HexagonISD::VALIGN"; case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR"; + case HexagonISD::VPACKL: return "HexagonISD::VPACKL"; case HexagonISD::OP_END: break; } return nullptr; @@ -3014,7 +3015,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR) return LowerINLINEASM(Op, DAG); - if (isHvxOperation(Op.getNode())) { + if (isHvxOperation(Op.getNode(), DAG)) { // If HVX lowering returns nothing, try the default lowering. if (SDValue V = LowerHvxOperation(Op, DAG)) return V; @@ -3075,7 +3076,7 @@ void HexagonTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { - if (isHvxOperation(N)) { + if (isHvxOperation(N, DAG)) { LowerHvxOperationWrapper(N, Results, DAG); if (!Results.empty()) return; @@ -3094,7 +3095,7 @@ void HexagonTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { - if (isHvxOperation(N)) { + if (isHvxOperation(N, DAG)) { ReplaceHvxNodeResults(N, Results, DAG); if (!Results.empty()) return; @@ -3123,7 +3124,7 @@ HexagonTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.isBeforeLegalizeOps()) return SDValue(); - if (isHvxOperation(N)) { + if (isHvxOperation(N, DCI.DAG)) { if (SDValue V = PerformHvxDAGCombine(N, DCI)) return V; return SDValue(); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index c12c3f6cd8ee5..8473515b3c758 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -88,6 +88,11 @@ enum NodeType : unsigned { // been loaded from address in Op2. VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is // an address in a vector load, then it's a no-op. + VPACKL, // Pack low parts of the input vector to the front of the + // output. For example v64i16 VPACKL(v32i32) will pick + // the low halfwords and pack them into the first 32 + // halfwords of the output. The rest of the output is + // unspecified. OP_END }; @@ -476,12 +481,13 @@ class HexagonTargetLowering : public TargetLowering { SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const; SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const; SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const; + SDValue WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const; std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override; - bool isHvxOperation(SDNode *N) const; + bool isHvxOperation(SDNode *N, SelectionDAG &DAG) const; SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index ed701728892ad..0f365a313233d 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -233,8 +233,10 @@ HexagonTargetLowering::initializeHVXLowering() { for (int N = 2; N < MaxElems; N *= 2) { MVT VecTy = MVT::getVectorVT(ElemTy, N); auto Action = getPreferredVectorAction(VecTy); - if (Action == TargetLoweringBase::TypeWidenVector) + if (Action == TargetLoweringBase::TypeWidenVector) { setOperationAction(ISD::STORE, VecTy, Custom); + setOperationAction(ISD::TRUNCATE, VecTy, Custom); + } } } @@ -1912,6 +1914,21 @@ HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { MOp, ISD::UNINDEXED, false, false); } +SDValue +HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { + const SDLoc &dl(Op); + MVT ResTy = ty(Op); + unsigned HwWidth = 8*Subtarget.getVectorLength(); + unsigned ResWidth = ResTy.getSizeInBits(); + assert(HwWidth % ResWidth == 0); + + unsigned WideNumElem = ResTy.getVectorNumElements() * (HwWidth / ResWidth); + MVT WideTy = MVT::getVectorVT(ResTy.getVectorElementType(), WideNumElem); + SDValue WideOp = DAG.getNode(HexagonISD::VPACKL, dl, WideTy, + Op.getOperand(0)); + return WideOp; +} + SDValue HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); @@ -2020,7 +2037,14 @@ void HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { unsigned Opc = N->getOpcode(); + SDValue Op(N, 0); switch (Opc) { + case ISD::TRUNCATE: + if (!Subtarget.isHVXVectorType(ty(Op), false)) { + SDValue T = WidenHvxTruncate(Op, DAG); + Results.push_back(T); + } + break; case ISD::BITCAST: if (isHvxBoolTy(ty(N->getOperand(0)))) { SDValue Op(N, 0); @@ -2058,25 +2082,38 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) } bool -HexagonTargetLowering::isHvxOperation(SDNode *N) const { - if (N->getOpcode() == ISD::STORE) { - // If it's a store-to-be-widened, treat it as an HVX operation. - SDValue Val = cast(N)->getValue(); - MVT ValTy = ty(Val); - if (ValTy.isVector()) { - auto Action = getPreferredVectorAction(ValTy); - if (Action == TargetLoweringBase::TypeWidenVector) - return true; - } - } +HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { // If the type of any result, or any operand type are HVX vector types, // this is an HVX operation. - auto IsHvxTy = [this] (EVT Ty) { + auto IsHvxTy = [this](EVT Ty) { return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true); }; auto IsHvxOp = [this](SDValue Op) { return Op.getValueType().isSimple() && Subtarget.isHVXVectorType(ty(Op), true); }; - return llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp); + if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp)) + return true; + + // Check if this could be an HVX operation after type widening. + auto IsWidenedToHvx = [this, &DAG](SDValue Op) { + if (!Op.getValueType().isSimple()) + return false; + MVT ValTy = ty(Op); + if (ValTy.isVector()) { + auto Action = getPreferredVectorAction(ValTy); + if (Action == TargetLoweringBase::TypeWidenVector) { + EVT WideTy = getTypeToTransformTo(*DAG.getContext(), ValTy); + assert(WideTy.isSimple()); + return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); + } + } + return false; + }; + + for (int i = 0, e = N->getNumValues(); i != e; ++i) { + if (IsWidenedToHvx(SDValue(N, i))) + return true; + } + return llvm::any_of(N->ops(), IsWidenedToHvx); } diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 0e5772bd690f2..b656a845b1526 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -1,3 +1,6 @@ +def SDTVecUnaryOp: + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; + def SDTVecBinOp: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>; @@ -37,6 +40,7 @@ def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>; def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>; def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>; def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>; +def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>; def vzero: PatFrag<(ops), (HexagonVZERO)>; def qtrue: PatFrag<(ops), (HexagonQTRUE)>; @@ -44,7 +48,8 @@ def qfalse: PatFrag<(ops), (HexagonQFALSE)>; def qcat: PatFrag<(ops node:$Qs, node:$Qt), (HexagonQCAT node:$Qs, node:$Qt)>; -def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; +def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; +def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>; def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>; def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>; @@ -401,6 +406,10 @@ let Predicates = [UseHVX] in { def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(VecI8 (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>; + def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w HvxVR:$Vs, (IMPLICIT_DEF))>; + def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>; + def: Pat<(VecI16 (bswap HVI16:$Vs)), (V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x01010101)))>; def: Pat<(VecI32 (bswap HVI32:$Vs)), diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate.ll index cc10c378f27b6..36157c66887cc 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s ; Check that this compiles successfully. -; CHECK: vpacke +; CHECK: vdeal target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" target triple = "hexagon" @@ -17,4 +17,4 @@ b0: ret void } -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv65" "target-features"="+hvx-length64b,+hvxv65" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/short-store-widen.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-store.ll similarity index 100% rename from llvm/test/CodeGen/Hexagon/autohvx/short-store-widen.ll rename to llvm/test/CodeGen/Hexagon/autohvx/isel-widen-store.ll diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate.ll new file mode 100644 index 0000000000000..6d5018757c7a6 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s + +; Check for successful compilation. +; Expect that the truncate to v32i8 is lowered to vdeale. + +; CHECK-LABEL: f0: +; CHECK: vdeale + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dllexport void @f0(<32 x i32> %a0) local_unnamed_addr #0 { +b0: + %v0 = trunc <32 x i32> %a0 to <32 x i8> + %v1 = shufflevector <32 x i8> %v0, <32 x i8> undef, <128 x i32> + tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v1, <128 x i8>* undef, i32 128, <128 x i1> ) + ret void +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32 immarg, <128 x i1>) #1 + +attributes #0 = { "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" } +attributes #1 = { argmemonly nounwind willreturn } From 298c9fae9370598a37896c002da6250c3b1a2313 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 4 Sep 2020 23:30:16 -0700 Subject: [PATCH 265/465] [NFC][compiler-rt] Refine .clang-tidy checks Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D87182 --- compiler-rt/.clang-tidy | 4 ++-- compiler-rt/lib/sanitizer_common/.clang-tidy | 16 ---------------- compiler-rt/test/.clang-tidy | 1 + 3 files changed, 3 insertions(+), 18 deletions(-) delete mode 100644 compiler-rt/lib/sanitizer_common/.clang-tidy create mode 100644 compiler-rt/test/.clang-tidy diff --git a/compiler-rt/.clang-tidy b/compiler-rt/.clang-tidy index e949902171e7f..4bad5ef216200 100644 --- a/compiler-rt/.clang-tidy +++ b/compiler-rt/.clang-tidy @@ -1,2 +1,2 @@ -# Checks enabled in the top-level .clang-tidy minus readability-identifier-naming -Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes' +# Checks enabled in the top-level .clang-tidy minus readability-identifier-naming and llvm-header-guard. +Checks: '-*,clang-diagnostic-*,llvm-*,-llvm-header-guard,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes' diff --git a/compiler-rt/lib/sanitizer_common/.clang-tidy b/compiler-rt/lib/sanitizer_common/.clang-tidy deleted file mode 100644 index 6c71abff0d382..0000000000000 --- a/compiler-rt/lib/sanitizer_common/.clang-tidy +++ /dev/null @@ -1,16 +0,0 @@ -Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,readability-identifier-naming' -CheckOptions: - - key: readability-identifier-naming.ClassCase - value: CamelCase - - key: readability-identifier-naming.EnumCase - value: CamelCase - - key: readability-identifier-naming.FunctionCase - value: CamelCase - - key: readability-identifier-naming.UnionCase - value: CamelCase - - key: readability-identifier-naming.GlobalConstantCase - value: CamelCase - - key: readability-identifier-naming.GlobalConstantPrefix - value: "k" - - key: readability-identifier-naming.VariableCase - value: lower_case diff --git a/compiler-rt/test/.clang-tidy b/compiler-rt/test/.clang-tidy new file mode 100644 index 0000000000000..612bd0ee8de8a --- /dev/null +++ b/compiler-rt/test/.clang-tidy @@ -0,0 +1 @@ +Checks: '-*' From 51597cc37a2679ebd7f0236adabffbd94506a5f1 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 3 Sep 2020 18:54:52 -0700 Subject: [PATCH 266/465] [NFC][Asan] Rename internal enum value. New name better represents the state of chunk. --- compiler-rt/lib/asan/asan_allocator.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 0e9add1ce7370..a752156aa9bdc 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -106,8 +106,11 @@ COMPILER_CHECK(kChunkHeader2Size <= 16); // CHUNK_ALLOCATED: the chunk is allocated and not yet freed. // CHUNK_QUARANTINE: the chunk was freed and put into quarantine zone. enum { - CHUNK_AVAILABLE = 0, // 0 is the default value even if we didn't set it. - CHUNK_ALLOCATED = 2, + // Either just allocated by underlying allocator, but AsanChunk is not yet + // ready, or almost returned to undelying allocator and AsanChunk is already + // meaningless. + CHUNK_INVALID = 0, + CHUNK_ALLOCATED = 2, CHUNK_QUARANTINE = 3 }; @@ -141,7 +144,7 @@ struct QuarantineCallback { void Recycle(AsanChunk *m) { CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); - atomic_store((atomic_uint8_t*)m, CHUNK_AVAILABLE, memory_order_relaxed); + atomic_store((atomic_uint8_t *)m, CHUNK_INVALID, memory_order_relaxed); CHECK_NE(m->alloc_tid, kInvalidTid); CHECK_NE(m->free_tid, kInvalidTid); PoisonShadow(m->Beg(), @@ -843,7 +846,7 @@ static AsanAllocator &get_allocator() { } bool AsanChunkView::IsValid() const { - return chunk_ && chunk_->chunk_state != CHUNK_AVAILABLE; + return chunk_ && chunk_->chunk_state != CHUNK_INVALID; } bool AsanChunkView::IsAllocated() const { return chunk_ && chunk_->chunk_state == CHUNK_ALLOCATED; From 398c44ba84e10dec1224365065615b08be106e1f Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 4 Sep 2020 00:17:34 -0700 Subject: [PATCH 267/465] [NFC][Asan] Inline enum doc strings --- compiler-rt/lib/asan/asan_allocator.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index a752156aa9bdc..81c22e7a213ff 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -101,16 +101,14 @@ static const uptr kChunkHeader2Size = sizeof(ChunkBase) - kChunkHeaderSize; COMPILER_CHECK(kChunkHeaderSize == 16); COMPILER_CHECK(kChunkHeader2Size <= 16); -// Every chunk of memory allocated by this allocator can be in one of 3 states: -// CHUNK_AVAILABLE: the chunk is in the free list and ready to be allocated. -// CHUNK_ALLOCATED: the chunk is allocated and not yet freed. -// CHUNK_QUARANTINE: the chunk was freed and put into quarantine zone. enum { // Either just allocated by underlying allocator, but AsanChunk is not yet // ready, or almost returned to undelying allocator and AsanChunk is already // meaningless. CHUNK_INVALID = 0, + // The chunk is allocated and not yet freed. CHUNK_ALLOCATED = 2, + // The chunk was freed and put into quarantine zone. CHUNK_QUARANTINE = 3 }; From d5d09d93ee1494630654c487d9464771dba4e4d2 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 4 Sep 2020 01:17:18 -0700 Subject: [PATCH 268/465] [NFC][Asan] Reformat some allocator code --- compiler-rt/lib/asan/asan_allocator.cpp | 40 ++++++++++--------- .../sanitizer_allocator_combined.h | 1 - .../sanitizer_allocator_primary64.h | 9 +++-- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 81c22e7a213ff..d6792b38a69db 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -72,14 +72,14 @@ static const uptr kAllocBegMagic = 0xCC6E96B9; struct ChunkHeader { // 1-st 8 bytes. - u32 chunk_state : 8; // Must be first. - u32 alloc_tid : 24; - - u32 free_tid : 24; - u32 from_memalign : 1; - u32 alloc_type : 2; - u32 rz_log : 3; - u32 lsan_tag : 2; + u32 chunk_state : 8; // Must be first. + u32 alloc_tid : 24; + + u32 free_tid : 24; + u32 from_memalign : 1; + u32 alloc_type : 2; + u32 rz_log : 3; + u32 lsan_tag : 2; // 2-nd 8 bytes // This field is used for small sizes. For large sizes it is equal to // SizeClassMap::kMaxSize and the actual size is stored in the @@ -109,7 +109,7 @@ enum { // The chunk is allocated and not yet freed. CHUNK_ALLOCATED = 2, // The chunk was freed and put into quarantine zone. - CHUNK_QUARANTINE = 3 + CHUNK_QUARANTINE = 3, }; struct AsanChunk: ChunkBase { @@ -118,7 +118,7 @@ struct AsanChunk: ChunkBase { if (user_requested_size != SizeClassMap::kMaxSize) return user_requested_size; return *reinterpret_cast( - get_allocator().GetMetaData(AllocBeg(locked_version))); + get_allocator().GetMetaData(AllocBeg(locked_version))); } void *AllocBeg(bool locked_version = false) { if (from_memalign) { @@ -508,7 +508,7 @@ struct Allocator { m->free_tid = kInvalidTid; m->from_memalign = user_beg != beg_plus_redzone; if (alloc_beg != chunk_beg) { - CHECK_LE(alloc_beg+ 2 * sizeof(uptr), chunk_beg); + CHECK_LE(alloc_beg + 2 * sizeof(uptr), chunk_beg); reinterpret_cast(alloc_beg)[0] = kAllocBegMagic; reinterpret_cast(alloc_beg)[1] = chunk_beg; } @@ -722,7 +722,8 @@ struct Allocator { // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). AsanChunk *GetAsanChunk(void *alloc_beg) { - if (!alloc_beg) return nullptr; + if (!alloc_beg) + return nullptr; if (!allocator.FromPrimary(alloc_beg)) { uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); AsanChunk *m = reinterpret_cast(meta[1]); @@ -738,11 +739,13 @@ struct Allocator { } AsanChunk *GetAsanChunkDebug(void *alloc_beg) { - if (!alloc_beg) return nullptr; + if (!alloc_beg) + return nullptr; if (!allocator.FromPrimary(alloc_beg)) { uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); AsanChunk *m = reinterpret_cast(meta[1]); - Printf("GetAsanChunkDebug1 alloc_beg %p meta %p m %p\n", alloc_beg, meta, m); + Printf("GetAsanChunkDebug1 alloc_beg %p meta %p m %p\n", alloc_beg, meta, + m); return m; } uptr *alloc_magic = reinterpret_cast(alloc_beg); @@ -755,7 +758,6 @@ struct Allocator { return reinterpret_cast(alloc_beg); } - AsanChunk *GetAsanChunkByAddr(uptr p) { void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast(p)); return GetAsanChunk(alloc_beg); @@ -771,7 +773,8 @@ struct Allocator { AsanChunk *GetAsanChunkByAddrFastLockedDebug(uptr p) { void *alloc_beg = allocator.GetBlockBeginFastLockedDebug(reinterpret_cast(p)); - Printf("GetAsanChunkByAddrFastLockedDebug p %p alloc_beg %p\n", p, alloc_beg); + Printf("GetAsanChunkByAddrFastLockedDebug p %p alloc_beg %p\n", p, + alloc_beg); return GetAsanChunkDebug(alloc_beg); } @@ -1036,7 +1039,7 @@ void AsanSoftRssLimitExceededCallback(bool limit_exceeded) { instance.SetRssLimitExceeded(limit_exceeded); } -} // namespace __asan +} // namespace __asan // --- Implementation of LSan-specific functions --- {{{1 namespace __lsan { @@ -1073,7 +1076,8 @@ extern "C" SANITIZER_WEAK_ATTRIBUTE const char *__lsan_current_stage; void GetUserBeginDebug(uptr chunk) { Printf("GetUserBeginDebug1 chunk %p\n", chunk); - __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLockedDebug(chunk); + __asan::AsanChunk *m = + __asan::instance.GetAsanChunkByAddrFastLockedDebug(chunk); Printf("GetUserBeginDebug2 m %p\n", m); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h index 6d73784d77d09..0cf483da1e5c8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h @@ -148,7 +148,6 @@ class CombinedAllocator { return secondary_.GetBlockBeginFastLocked(p); } - uptr GetActuallyAllocatedSize(void *p) { if (primary_.PointerIsMine(p)) return primary_.GetActuallyAllocatedSize(p); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h index 7af469c56fd6a..a6126fc6265eb 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h @@ -203,7 +203,8 @@ class SizeClassAllocator64 { uptr class_id = GetSizeClass(p); uptr size = ClassIdToSize(class_id); Printf("GetBlockBeginDebug1 p %p class_id %p size %p\n", p, class_id, size); - if (!size) return nullptr; + if (!size) + return nullptr; uptr chunk_idx = GetChunkIdx((uptr)p, size); uptr reg_beg = GetRegionBegin(p); uptr beg = chunk_idx * size; @@ -212,16 +213,16 @@ class SizeClassAllocator64 { "GetBlockBeginDebug2 chunk_idx %p reg_beg %p beg %p next_beg %p " "kNumClasses %p\n", chunk_idx, reg_beg, beg, next_beg, kNumClasses); - if (class_id >= kNumClasses) return nullptr; + if (class_id >= kNumClasses) + return nullptr; const RegionInfo *region = AddressSpaceView::Load(GetRegionInfo(class_id)); Printf("GetBlockBeginDebug3 region %p region->mapped_user %p\n", region, region->mapped_user); if (region->mapped_user >= next_beg) - return reinterpret_cast(reg_beg + beg); + return reinterpret_cast(reg_beg + beg); return nullptr; } - uptr GetActuallyAllocatedSize(void *p) { CHECK(PointerIsMine(p)); return ClassIdToSize(GetSizeClass(p)); From 8789f2bbde8fe540a74d80d6e48eb466f8a9d6ea Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Sat, 5 Sep 2020 16:11:07 -0500 Subject: [PATCH 269/465] [Hexagon] Resize the mem operand when widening loads and stores --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 5 +-- .../Hexagon/autohvx/isel-widen-memop.ll | 36 +++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-widen-memop.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 0f365a313233d..a2420dae8afff 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1704,18 +1704,19 @@ SDValue HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); unsigned HwLen = Subtarget.getVectorLength(); + MachineFunction &MF = DAG.getMachineFunction(); auto *MaskN = cast(Op.getNode()); SDValue Mask = MaskN->getMask(); SDValue Chain = MaskN->getChain(); SDValue Base = MaskN->getBasePtr(); - auto *MemOp = MaskN->getMemOperand(); + auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen); unsigned Opc = Op->getOpcode(); assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE); if (Opc == ISD::MLOAD) { MVT ValTy = ty(Op); - SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MaskN->getMemOperand()); + SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp); SDValue Thru = cast(MaskN)->getPassThru(); if (isUndef(Thru)) return Load; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-memop.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-memop.ll new file mode 100644 index 0000000000000..bed13b1dbcc98 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-memop.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check for successful compilation. +; CHECK-LABEL: f0: +; CHECK: dealloc_return + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dso_local void @f0(i16* %a0) local_unnamed_addr #0 { +b0: + %v0 = getelementptr i16, i16* %a0, i32 8 + %v1 = getelementptr i16, i16* %v0, i32 0 + %v2 = icmp eq i32 0, 0 + %v3 = insertelement <8 x i1> undef, i1 %v2, i64 0 + %v4 = shufflevector <8 x i1> %v3, <8 x i1> undef, <8 x i32> zeroinitializer + %v5 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull undef, i32 4, <8 x i1> %v4, <8 x i32> undef) + %v6 = sub nsw <8 x i32> zeroinitializer, %v5 + %v7 = add nsw <8 x i32> %v6, zeroinitializer + %v8 = add <8 x i32> zeroinitializer, %v7 + %v9 = lshr <8 x i32> %v8, + %v10 = trunc <8 x i32> %v9 to <8 x i16> + %v11 = bitcast i16* %v1 to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v10, <8 x i16>* %v11, i32 2, <8 x i1> %v4) + ret void +} + +; Function Attrs: argmemonly nounwind readonly willreturn +declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32 immarg, <8 x i1>, <8 x i32>) #1 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) #2 + +attributes #0 = { "target-features"="+hvx-length64b,+hvxv65,+v65,-long-calls,-packets" } +attributes #1 = { argmemonly nounwind readonly willreturn } +attributes #2 = { argmemonly nounwind willreturn } From 9518f032e49280d973dcae9e1ff460b1f98cc9df Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Sat, 5 Sep 2020 16:29:36 -0500 Subject: [PATCH 270/465] [Hexagon] When widening truncate result, also widen operand if necessary --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 34 ++++++++++++----- .../Hexagon/autohvx/isel-widen-truncate-op.ll | 37 +++++++++++++++++++ 2 files changed, 62 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-op.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index a2420dae8afff..604d13ee874bf 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1918,16 +1918,32 @@ HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); - MVT ResTy = ty(Op); unsigned HwWidth = 8*Subtarget.getVectorLength(); - unsigned ResWidth = ResTy.getSizeInBits(); - assert(HwWidth % ResWidth == 0); - - unsigned WideNumElem = ResTy.getVectorNumElements() * (HwWidth / ResWidth); - MVT WideTy = MVT::getVectorVT(ResTy.getVectorElementType(), WideNumElem); - SDValue WideOp = DAG.getNode(HexagonISD::VPACKL, dl, WideTy, - Op.getOperand(0)); - return WideOp; + + auto getFactor = [HwWidth](MVT Ty) { + unsigned Width = Ty.getSizeInBits(); + assert(HwWidth % Width == 0); + return HwWidth / Width; + }; + + auto getWideTy = [getFactor](MVT Ty) { + unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); + return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); + }; + + SDValue Op0 = Op.getOperand(0); + MVT ResTy = ty(Op); + MVT OpTy = ty(Op0); + if (Subtarget.isHVXVectorType(OpTy)) + return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0); + + MVT WideOpTy = getWideTy(OpTy); + SmallVector Concats = {Op0}; + for (int i = 0, e = getFactor(OpTy) - 1; i != e; ++i) + Concats.push_back(DAG.getUNDEF(OpTy)); + + SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideOpTy, Concats); + return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Cat); } SDValue diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-op.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-op.ll new file mode 100644 index 0000000000000..404d3d1ff2606 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-widen-truncate-op.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-hvx-widen=16 < %s | FileCheck %s + +; Check for successful compilation. +; CHECK-LABEL: f0: +; CHECK: vmemu + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dso_local void @f0(i16* %a0) local_unnamed_addr #0 { +b0: + %v0 = getelementptr i16, i16* %a0, i32 8 + %v1 = getelementptr i16, i16* %v0, i32 0 + %v2 = icmp eq i32 0, 0 + %v3 = insertelement <8 x i1> undef, i1 %v2, i64 0 + %v4 = shufflevector <8 x i1> %v3, <8 x i1> undef, <8 x i32> zeroinitializer + %v5 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* nonnull undef, i32 4, <8 x i1> %v4, <8 x i32> undef) + %v6 = sub nsw <8 x i32> zeroinitializer, %v5 + %v7 = add nsw <8 x i32> %v6, zeroinitializer + %v8 = add <8 x i32> zeroinitializer, %v7 + %v9 = lshr <8 x i32> %v8, + %v10 = trunc <8 x i32> %v9 to <8 x i16> + %v11 = bitcast i16* %v1 to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v10, <8 x i16>* %v11, i32 2, <8 x i1> %v4) + ret void +} + +; Function Attrs: argmemonly nounwind readonly willreturn +declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32 immarg, <8 x i1>, <8 x i32>) #1 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) #2 + +attributes #0 = { "target-features"="+hvx-length64b,+hvxv65,+v65,-long-calls,-packets" } +attributes #1 = { argmemonly nounwind readonly willreturn } +attributes #2 = { argmemonly nounwind willreturn } From 62f89a89f312045f26ec74b73512e2080df35875 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Sat, 5 Sep 2020 18:15:38 -0500 Subject: [PATCH 271/465] [Hexagon] Add assertions about V6_pred_scalar2 --- llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 604d13ee874bf..e5d05cfe64c47 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -631,6 +631,9 @@ HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, if (!ZeroFill) return S; // Fill the bytes beyond BlockLen with 0s. + // V6_pred_scalar2 cannot fill the entire predicate, so it only works + // when BlockLen < HwLen. + assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); @@ -1094,6 +1097,7 @@ HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV, // ByteVec is the target vector VecV rotated in such a way that the // subvector should be inserted at index 0. Generate a predicate mask // and use vmux to do the insertion. + assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); @@ -1906,6 +1910,7 @@ HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { } assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia + assert(ValueLen < HwLen && "vsetq(v1) prerequisite"); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue StoreQ = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG); From aff94ec0f4ded3eff2d5c0431f5fec9e41731b05 Mon Sep 17 00:00:00 2001 From: vnalamot Date: Sun, 6 Sep 2020 07:00:36 +0530 Subject: [PATCH 272/465] [AMDGPU] Remove the dead spill slots while spilling FP/BP to memory During the PEI pass, the dead TargetStackID::SGPRSpill spill slots are not being removed while spilling the FP/BP to memory. Fixes: SWDEV-250393 Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D87032 --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 14 +++++++++----- llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll | 9 +++++---- llvm/test/CodeGen/AMDGPU/stack-realign.ll | 8 ++++---- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 9b795b22f5234..c947995fd3ee4 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -112,15 +112,19 @@ static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, // 3: There's no free lane to spill, and no free register to save FP/BP, // so we're forced to spill another VGPR to use for the spill. FrameIndex = NewFI; + + LLVM_DEBUG( + auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); + dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to " + << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';); } else { + // Remove dead index + MF.getFrameInfo().RemoveStackObject(NewFI); // 4: If all else fails, spill the FP/BP to memory. FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); + LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling " + << (IsFP ? "FP" : "BP") << '\n'); } - - LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); - dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to " - << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane - << '\n';); } else { LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to " << printReg(TempSGPR, TRI) << '\n'); diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index 3016d99fc8c1c..e6980b895f590 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -463,15 +463,16 @@ define void @ipra_call_with_stack() #0 { ; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory: ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33 -; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:[[OFF:[0-9]+]] +; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:4 ; GCN: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN: s_mov_b32 s33, s32 ; GCN: s_or_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:[[OFF]] +; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:4 ; GCN: s_waitcnt vmcnt(0) ; GCN: v_readfirstlane_b32 s33, [[TMP_VGPR2]] ; GCN: s_mov_b64 exec, [[COPY_EXEC2]] ; GCN: s_setpc_b64 +; GCN: ScratchSize: 8 define void @callee_need_to_spill_fp_to_memory() #3 { call void asm sideeffect "; clobber nonpreserved SGPRs", "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} @@ -529,8 +530,8 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset ; GCN: s_or_saveexec_b64 s[4:5], -1 ; GCN: v_mov_b32_e32 v0, s33 -; GCN-NOT: v_mov_b32_e32 v0, 0x100c -; GCN-NEXT: v_mov_b32_e32 v1, 0x100c +; GCN-NOT: v_mov_b32_e32 v0, 0x1008 +; GCN-NEXT: v_mov_b32_e32 v1, 0x1008 ; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval align 4 %arg) #3 { %alloca = alloca i32, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll index e8e3518aed1c2..3219c75c43a82 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -291,12 +291,12 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i ; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset ; GCN: s_or_saveexec_b64 s[4:5], -1 ; GCN: v_mov_b32_e32 v0, s33 -; GCN-NOT: v_mov_b32_e32 v0, 0x1088 -; GCN-NEXT: v_mov_b32_e32 v1, 0x1088 +; GCN-NOT: v_mov_b32_e32 v0, 0x1084 +; GCN-NEXT: v_mov_b32_e32 v1, 0x1084 ; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen ; GCN: v_mov_b32_e32 v0, s34 -; GCN-NOT: v_mov_b32_e32 v0, 0x1090 -; GCN-NEXT: v_mov_b32_e32 v1, 0x1090 +; GCN-NOT: v_mov_b32_e32 v0, 0x1088 +; GCN-NEXT: v_mov_b32_e32 v1, 0x1088 ; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen %local_val = alloca i32, align 128, addrspace(5) store volatile i32 %b, i32 addrspace(5)* %local_val, align 128 From 9128f5c8a719095f12a8864a2b81395c70a321b3 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sat, 5 Sep 2020 16:52:48 -0700 Subject: [PATCH 273/465] [NFC][Asan] Move free_tid from ChunkHeader The goal to make chunk_state atomic, but we can't pack it with free_tid on Windows. --- compiler-rt/lib/asan/asan_allocator.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index d6792b38a69db..6755549c97220 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -71,11 +71,9 @@ static AsanAllocator &get_allocator(); static const uptr kAllocBegMagic = 0xCC6E96B9; struct ChunkHeader { - // 1-st 8 bytes. - u32 chunk_state : 8; // Must be first. + u8 chunk_state; // Must be first. + u8 padding[3]; u32 alloc_tid : 24; - - u32 free_tid : 24; u32 from_memalign : 1; u32 alloc_type : 2; u32 rz_log : 3; @@ -94,6 +92,7 @@ struct ChunkHeader { struct ChunkBase : ChunkHeader { // Header2, intersects with user memory. u32 free_context_id; + u32 free_tid; }; static const uptr kChunkHeaderSize = sizeof(ChunkHeader); @@ -505,7 +504,6 @@ struct Allocator { u32 alloc_tid = t ? t->tid() : 0; m->alloc_tid = alloc_tid; CHECK_EQ(alloc_tid, m->alloc_tid); // Does alloc_tid fit into the bitfield? - m->free_tid = kInvalidTid; m->from_memalign = user_beg != beg_plus_redzone; if (alloc_beg != chunk_beg) { CHECK_LE(alloc_beg + 2 * sizeof(uptr), chunk_beg); @@ -577,6 +575,9 @@ struct Allocator { return false; } CHECK_EQ(CHUNK_ALLOCATED, old_chunk_state); + // It was a user data. + m->free_tid = kInvalidTid; + m->free_context_id = 0; return true; } @@ -584,9 +585,6 @@ struct Allocator { // AtomicallySetQuarantineFlagIfAllocated. void QuarantineChunk(AsanChunk *m, void *ptr, BufferedStackTrace *stack) { CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); - CHECK_GE(m->alloc_tid, 0); - if (SANITIZER_WORDSIZE == 64) // On 32-bits this resides in user area. - CHECK_EQ(m->free_tid, kInvalidTid); AsanThread *t = GetCurrentThread(); m->free_tid = t ? t->tid() : 0; m->free_context_id = StackDepotPut(*stack); @@ -862,7 +860,9 @@ u32 AsanChunkView::UserRequestedAlignment() const { return Allocator::ComputeUserAlignment(chunk_->user_requested_alignment_log); } uptr AsanChunkView::AllocTid() const { return chunk_->alloc_tid; } -uptr AsanChunkView::FreeTid() const { return chunk_->free_tid; } +uptr AsanChunkView::FreeTid() const { + return IsQuarantined() ? chunk_->free_tid : kInvalidTid; +} AllocType AsanChunkView::GetAllocType() const { return (AllocType)chunk_->alloc_type; } @@ -875,7 +875,9 @@ static StackTrace GetStackTraceFromId(u32 id) { } u32 AsanChunkView::GetAllocStackId() const { return chunk_->alloc_context_id; } -u32 AsanChunkView::GetFreeStackId() const { return chunk_->free_context_id; } +u32 AsanChunkView::GetFreeStackId() const { + return IsQuarantined() ? chunk_->free_context_id : 0; +} StackTrace AsanChunkView::GetAllocStack() const { return GetStackTraceFromId(GetAllocStackId()); From 289c08e4bc6b848a70d6cdefafa21a2e91e2c5f7 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sat, 5 Sep 2020 19:09:41 -0700 Subject: [PATCH 274/465] [NFC][Asan] Fix clang-tidy warning --- compiler-rt/lib/asan/asan_allocator.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 6755549c97220..a84506181362f 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -1142,9 +1142,8 @@ IgnoreObjectResult IgnoreObjectLocked(const void *p) { return kIgnoreObjectAlreadyIgnored; m->lsan_tag = __lsan::kIgnored; return kIgnoreObjectSuccess; - } else { - return kIgnoreObjectInvalid; } + return kIgnoreObjectInvalid; } } // namespace __lsan From eb87e1dbcfdf15c0711146ff3e6b2e1e40c8863a Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sat, 5 Sep 2020 19:06:27 -0700 Subject: [PATCH 275/465] [Asan] Cleanup atomic usage in allocator There are no know bugs related to this, still it may fix some latent ones. Main concerns with preexisting code: 1. Inconsistent atomic/non-atomic access to the same field. 2. Assumption that bitfield chunk_state is always the first byte without even taking into account endianness. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D86917 --- compiler-rt/lib/asan/asan_allocator.cpp | 115 ++++++++++++++---------- 1 file changed, 69 insertions(+), 46 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index a84506181362f..b41cfe2de467f 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -71,8 +71,7 @@ static AsanAllocator &get_allocator(); static const uptr kAllocBegMagic = 0xCC6E96B9; struct ChunkHeader { - u8 chunk_state; // Must be first. - u8 padding[3]; + atomic_uint32_t chunk_state; u32 alloc_tid : 24; u32 from_memalign : 1; u32 alloc_type : 2; @@ -86,7 +85,7 @@ struct ChunkHeader { // align < 8 -> 0 // else -> log2(min(align, 512)) - 2 u32 user_requested_alignment_log : 3; - u32 alloc_context_id; + atomic_uint32_t alloc_context_id; }; struct ChunkBase : ChunkHeader { @@ -140,8 +139,12 @@ struct QuarantineCallback { } void Recycle(AsanChunk *m) { - CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); - atomic_store((atomic_uint8_t *)m, CHUNK_INVALID, memory_order_relaxed); + u32 old_chunk_state = CHUNK_QUARANTINE; + if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, + CHUNK_INVALID, memory_order_acquire)) { + CHECK_EQ(old_chunk_state, CHUNK_QUARANTINE); + } + CHECK_NE(m->alloc_tid, kInvalidTid); CHECK_NE(m->free_tid, kInvalidTid); PoisonShadow(m->Beg(), @@ -301,22 +304,25 @@ struct Allocator { // housekeeping chunk, like TransferBatch. Start by assuming the former. AsanChunk *ac = GetAsanChunk((void *)chunk); uptr allocated_size = allocator.GetActuallyAllocatedSize((void *)ac); - uptr beg = ac->Beg(); - uptr end = ac->Beg() + ac->UsedSize(true); - uptr chunk_end = chunk + allocated_size; - if (chunk < beg && beg < end && end <= chunk_end && - ac->chunk_state == CHUNK_ALLOCATED) { - // Looks like a valid AsanChunk in use, poison redzones only. - PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); - uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); - FastPoisonShadowPartialRightRedzone( - end_aligned_down, end - end_aligned_down, - chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); - } else { - // This is either not an AsanChunk or freed or quarantined AsanChunk. - // In either case, poison everything. - PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); + if (atomic_load(&ac->chunk_state, memory_order_acquire) == + CHUNK_ALLOCATED) { + uptr beg = ac->Beg(); + uptr end = ac->Beg() + ac->UsedSize(true); + uptr chunk_end = chunk + allocated_size; + if (chunk < beg && beg < end && end <= chunk_end) { + // Looks like a valid AsanChunk in use, poison redzones only. + PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); + uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); + FastPoisonShadowPartialRightRedzone( + end_aligned_down, end - end_aligned_down, + chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); + return; + } } + + // This is either not an AsanChunk or freed or quarantined AsanChunk. + // In either case, poison everything. + PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); } void ReInitialize(const AllocatorOptions &options) { @@ -381,14 +387,18 @@ struct Allocator { AsanChunk *right_chunk) { // Prefer an allocated chunk over freed chunk and freed chunk // over available chunk. - if (left_chunk->chunk_state != right_chunk->chunk_state) { - if (left_chunk->chunk_state == CHUNK_ALLOCATED) + u32 left_state = + atomic_load(&left_chunk->chunk_state, memory_order_relaxed); + u32 right_state = + atomic_load(&right_chunk->chunk_state, memory_order_relaxed); + if (left_state != right_state) { + if (left_state == CHUNK_ALLOCATED) return left_chunk; - if (right_chunk->chunk_state == CHUNK_ALLOCATED) + if (right_state == CHUNK_ALLOCATED) return right_chunk; - if (left_chunk->chunk_state == CHUNK_QUARANTINE) + if (left_state == CHUNK_QUARANTINE) return left_chunk; - if (right_chunk->chunk_state == CHUNK_QUARANTINE) + if (right_state == CHUNK_QUARANTINE) return right_chunk; } // Same chunk_state: choose based on offset. @@ -403,9 +413,10 @@ struct Allocator { bool UpdateAllocationStack(uptr addr, BufferedStackTrace *stack) { AsanChunk *m = GetAsanChunkByAddr(addr); if (!m) return false; - if (m->chunk_state != CHUNK_ALLOCATED) return false; + if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) + return false; if (m->Beg() != addr) return false; - atomic_store((atomic_uint32_t *)&m->alloc_context_id, StackDepotPut(*stack), + atomic_store(&m->alloc_context_id, StackDepotPut(*stack), memory_order_relaxed); return true; } @@ -523,7 +534,8 @@ struct Allocator { } m->user_requested_alignment_log = user_requested_alignment_log; - m->alloc_context_id = StackDepotPut(*stack); + atomic_store(&m->alloc_context_id, StackDepotPut(*stack), + memory_order_relaxed); uptr size_rounded_down_to_granularity = RoundDownTo(size, SHADOW_GRANULARITY); @@ -556,7 +568,7 @@ struct Allocator { : __lsan::kDirectlyLeaked; #endif // Must be the last mutation of metadata in this function. - atomic_store((atomic_uint8_t *)m, CHUNK_ALLOCATED, memory_order_release); + atomic_store(&m->chunk_state, CHUNK_ALLOCATED, memory_order_release); ASAN_MALLOC_HOOK(res, size); return res; } @@ -564,10 +576,10 @@ struct Allocator { // Set quarantine flag if chunk is allocated, issue ASan error report on // available and quarantined chunks. Return true on success, false otherwise. bool AtomicallySetQuarantineFlagIfAllocated(AsanChunk *m, void *ptr, - BufferedStackTrace *stack) { - u8 old_chunk_state = CHUNK_ALLOCATED; + BufferedStackTrace *stack) { + u32 old_chunk_state = CHUNK_ALLOCATED; // Flip the chunk_state atomically to avoid race on double-free. - if (!atomic_compare_exchange_strong((atomic_uint8_t *)m, &old_chunk_state, + if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, CHUNK_QUARANTINE, memory_order_acquire)) { ReportInvalidFree(ptr, old_chunk_state, stack); @@ -584,7 +596,8 @@ struct Allocator { // Expects the chunk to already be marked as quarantined by using // AtomicallySetQuarantineFlagIfAllocated. void QuarantineChunk(AsanChunk *m, void *ptr, BufferedStackTrace *stack) { - CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); + CHECK_EQ(atomic_load(&m->chunk_state, memory_order_relaxed), + CHUNK_QUARANTINE); AsanThread *t = GetCurrentThread(); m->free_tid = t ? t->tid() : 0; m->free_context_id = StackDepotPut(*stack); @@ -676,7 +689,7 @@ struct Allocator { void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC, true); if (new_ptr) { - u8 chunk_state = m->chunk_state; + u32 chunk_state = atomic_load(&m->chunk_state, memory_order_acquire); if (chunk_state != CHUNK_ALLOCATED) ReportInvalidFree(old_ptr, chunk_state, stack); CHECK_NE(REAL(memcpy), nullptr); @@ -703,7 +716,8 @@ struct Allocator { return ptr; } - void ReportInvalidFree(void *ptr, u8 chunk_state, BufferedStackTrace *stack) { + void ReportInvalidFree(void *ptr, u32 chunk_state, + BufferedStackTrace *stack) { if (chunk_state == CHUNK_QUARANTINE) ReportDoubleFree((uptr)ptr, stack); else @@ -779,7 +793,8 @@ struct Allocator { uptr AllocationSize(uptr p) { AsanChunk *m = GetAsanChunkByAddr(p); if (!m) return 0; - if (m->chunk_state != CHUNK_ALLOCATED) return 0; + if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) + return 0; if (m->Beg() != p) return 0; return m->UsedSize(); } @@ -845,13 +860,16 @@ static AsanAllocator &get_allocator() { } bool AsanChunkView::IsValid() const { - return chunk_ && chunk_->chunk_state != CHUNK_INVALID; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) != + CHUNK_INVALID; } bool AsanChunkView::IsAllocated() const { - return chunk_ && chunk_->chunk_state == CHUNK_ALLOCATED; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == + CHUNK_ALLOCATED; } bool AsanChunkView::IsQuarantined() const { - return chunk_ && chunk_->chunk_state == CHUNK_QUARANTINE; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == + CHUNK_QUARANTINE; } uptr AsanChunkView::Beg() const { return chunk_->Beg(); } uptr AsanChunkView::End() const { return Beg() + UsedSize(); } @@ -874,7 +892,9 @@ static StackTrace GetStackTraceFromId(u32 id) { return res; } -u32 AsanChunkView::GetAllocStackId() const { return chunk_->alloc_context_id; } +u32 AsanChunkView::GetAllocStackId() const { + return atomic_load(&chunk_->alloc_context_id, memory_order_relaxed); +} u32 AsanChunkView::GetFreeStackId() const { return IsQuarantined() ? chunk_->free_context_id : 0; } @@ -1061,10 +1081,10 @@ void GetAllocatorGlobalRange(uptr *begin, uptr *end) { uptr PointsIntoChunk(void* p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(addr); - if (!m) return 0; - uptr chunk = m->Beg(); - if (m->chunk_state != __asan::CHUNK_ALLOCATED) + if (!m || atomic_load(&m->chunk_state, memory_order_acquire) != + __asan::CHUNK_ALLOCATED) return 0; + uptr chunk = m->Beg(); if (m->AddrIsInside(addr, /*locked_version=*/true)) return chunk; if (IsSpecialCaseOfOperatorNew0(chunk, m->UsedSize(/*locked_version*/ true), @@ -1106,7 +1126,8 @@ LsanMetadata::LsanMetadata(uptr chunk) { bool LsanMetadata::allocated() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return m->chunk_state == __asan::CHUNK_ALLOCATED; + return atomic_load(&m->chunk_state, memory_order_relaxed) == + __asan::CHUNK_ALLOCATED; } ChunkTag LsanMetadata::tag() const { @@ -1126,7 +1147,7 @@ uptr LsanMetadata::requested_size() const { u32 LsanMetadata::stack_trace_id() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return m->alloc_context_id; + return atomic_load(&m->alloc_context_id, memory_order_relaxed); } void ForEachChunk(ForEachChunkCallback callback, void *arg) { @@ -1137,7 +1158,9 @@ IgnoreObjectResult IgnoreObjectLocked(const void *p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddr(addr); if (!m) return kIgnoreObjectInvalid; - if ((m->chunk_state == __asan::CHUNK_ALLOCATED) && m->AddrIsInside(addr)) { + if ((atomic_load(&m->chunk_state, memory_order_acquire) == + __asan::CHUNK_ALLOCATED) && + m->AddrIsInside(addr)) { if (m->lsan_tag == kIgnored) return kIgnoreObjectAlreadyIgnored; m->lsan_tag = __lsan::kIgnored; From d0abc757495349fd053beeaea81cd954c2e457e7 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Sat, 5 Sep 2020 21:00:15 -0700 Subject: [PATCH 276/465] [GlobalISel] Disable the indexed loads combine completely unless forced. NFC. The post-index matcher, before it queries the target legality, walks uses of some instructions which in pathological cases can be massive. Since no targets actually support indexed loads yet, disable this to stop wasting compile time on something which is going to fail anyway. --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 5dbd09670feaf..6a89060805e09 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -697,7 +697,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, return false; LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); - + // FIXME: The following use traversal needs a bail out for patholigical cases. for (auto &Use : MRI.use_nodbg_instructions(Base)) { if (Use.getOpcode() != TargetOpcode::G_PTR_ADD) continue; @@ -824,6 +824,11 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) return false; + // For now, no targets actually support these opcodes so don't waste time + // running these unless we're forced to for testing. + if (!ForceLegalIndexing) + return false; + MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base, MatchInfo.Offset); if (!MatchInfo.IsPre && From b2e27a86c18e13043be0ed7bf2855d313cc0ac38 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sat, 5 Sep 2020 23:41:25 -0700 Subject: [PATCH 277/465] Revert "[Asan] Cleanup atomic usage in allocator" Crashes on PPC This reverts commit eb87e1dbcfdf15c0711146ff3e6b2e1e40c8863a. --- compiler-rt/lib/asan/asan_allocator.cpp | 115 ++++++++++-------------- 1 file changed, 46 insertions(+), 69 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index b41cfe2de467f..a84506181362f 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -71,7 +71,8 @@ static AsanAllocator &get_allocator(); static const uptr kAllocBegMagic = 0xCC6E96B9; struct ChunkHeader { - atomic_uint32_t chunk_state; + u8 chunk_state; // Must be first. + u8 padding[3]; u32 alloc_tid : 24; u32 from_memalign : 1; u32 alloc_type : 2; @@ -85,7 +86,7 @@ struct ChunkHeader { // align < 8 -> 0 // else -> log2(min(align, 512)) - 2 u32 user_requested_alignment_log : 3; - atomic_uint32_t alloc_context_id; + u32 alloc_context_id; }; struct ChunkBase : ChunkHeader { @@ -139,12 +140,8 @@ struct QuarantineCallback { } void Recycle(AsanChunk *m) { - u32 old_chunk_state = CHUNK_QUARANTINE; - if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, - CHUNK_INVALID, memory_order_acquire)) { - CHECK_EQ(old_chunk_state, CHUNK_QUARANTINE); - } - + CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); + atomic_store((atomic_uint8_t *)m, CHUNK_INVALID, memory_order_relaxed); CHECK_NE(m->alloc_tid, kInvalidTid); CHECK_NE(m->free_tid, kInvalidTid); PoisonShadow(m->Beg(), @@ -304,25 +301,22 @@ struct Allocator { // housekeeping chunk, like TransferBatch. Start by assuming the former. AsanChunk *ac = GetAsanChunk((void *)chunk); uptr allocated_size = allocator.GetActuallyAllocatedSize((void *)ac); - if (atomic_load(&ac->chunk_state, memory_order_acquire) == - CHUNK_ALLOCATED) { - uptr beg = ac->Beg(); - uptr end = ac->Beg() + ac->UsedSize(true); - uptr chunk_end = chunk + allocated_size; - if (chunk < beg && beg < end && end <= chunk_end) { - // Looks like a valid AsanChunk in use, poison redzones only. - PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); - uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); - FastPoisonShadowPartialRightRedzone( - end_aligned_down, end - end_aligned_down, - chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); - return; - } + uptr beg = ac->Beg(); + uptr end = ac->Beg() + ac->UsedSize(true); + uptr chunk_end = chunk + allocated_size; + if (chunk < beg && beg < end && end <= chunk_end && + ac->chunk_state == CHUNK_ALLOCATED) { + // Looks like a valid AsanChunk in use, poison redzones only. + PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); + uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); + FastPoisonShadowPartialRightRedzone( + end_aligned_down, end - end_aligned_down, + chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); + } else { + // This is either not an AsanChunk or freed or quarantined AsanChunk. + // In either case, poison everything. + PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); } - - // This is either not an AsanChunk or freed or quarantined AsanChunk. - // In either case, poison everything. - PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); } void ReInitialize(const AllocatorOptions &options) { @@ -387,18 +381,14 @@ struct Allocator { AsanChunk *right_chunk) { // Prefer an allocated chunk over freed chunk and freed chunk // over available chunk. - u32 left_state = - atomic_load(&left_chunk->chunk_state, memory_order_relaxed); - u32 right_state = - atomic_load(&right_chunk->chunk_state, memory_order_relaxed); - if (left_state != right_state) { - if (left_state == CHUNK_ALLOCATED) + if (left_chunk->chunk_state != right_chunk->chunk_state) { + if (left_chunk->chunk_state == CHUNK_ALLOCATED) return left_chunk; - if (right_state == CHUNK_ALLOCATED) + if (right_chunk->chunk_state == CHUNK_ALLOCATED) return right_chunk; - if (left_state == CHUNK_QUARANTINE) + if (left_chunk->chunk_state == CHUNK_QUARANTINE) return left_chunk; - if (right_state == CHUNK_QUARANTINE) + if (right_chunk->chunk_state == CHUNK_QUARANTINE) return right_chunk; } // Same chunk_state: choose based on offset. @@ -413,10 +403,9 @@ struct Allocator { bool UpdateAllocationStack(uptr addr, BufferedStackTrace *stack) { AsanChunk *m = GetAsanChunkByAddr(addr); if (!m) return false; - if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) - return false; + if (m->chunk_state != CHUNK_ALLOCATED) return false; if (m->Beg() != addr) return false; - atomic_store(&m->alloc_context_id, StackDepotPut(*stack), + atomic_store((atomic_uint32_t *)&m->alloc_context_id, StackDepotPut(*stack), memory_order_relaxed); return true; } @@ -534,8 +523,7 @@ struct Allocator { } m->user_requested_alignment_log = user_requested_alignment_log; - atomic_store(&m->alloc_context_id, StackDepotPut(*stack), - memory_order_relaxed); + m->alloc_context_id = StackDepotPut(*stack); uptr size_rounded_down_to_granularity = RoundDownTo(size, SHADOW_GRANULARITY); @@ -568,7 +556,7 @@ struct Allocator { : __lsan::kDirectlyLeaked; #endif // Must be the last mutation of metadata in this function. - atomic_store(&m->chunk_state, CHUNK_ALLOCATED, memory_order_release); + atomic_store((atomic_uint8_t *)m, CHUNK_ALLOCATED, memory_order_release); ASAN_MALLOC_HOOK(res, size); return res; } @@ -576,10 +564,10 @@ struct Allocator { // Set quarantine flag if chunk is allocated, issue ASan error report on // available and quarantined chunks. Return true on success, false otherwise. bool AtomicallySetQuarantineFlagIfAllocated(AsanChunk *m, void *ptr, - BufferedStackTrace *stack) { - u32 old_chunk_state = CHUNK_ALLOCATED; + BufferedStackTrace *stack) { + u8 old_chunk_state = CHUNK_ALLOCATED; // Flip the chunk_state atomically to avoid race on double-free. - if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, + if (!atomic_compare_exchange_strong((atomic_uint8_t *)m, &old_chunk_state, CHUNK_QUARANTINE, memory_order_acquire)) { ReportInvalidFree(ptr, old_chunk_state, stack); @@ -596,8 +584,7 @@ struct Allocator { // Expects the chunk to already be marked as quarantined by using // AtomicallySetQuarantineFlagIfAllocated. void QuarantineChunk(AsanChunk *m, void *ptr, BufferedStackTrace *stack) { - CHECK_EQ(atomic_load(&m->chunk_state, memory_order_relaxed), - CHUNK_QUARANTINE); + CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); AsanThread *t = GetCurrentThread(); m->free_tid = t ? t->tid() : 0; m->free_context_id = StackDepotPut(*stack); @@ -689,7 +676,7 @@ struct Allocator { void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC, true); if (new_ptr) { - u32 chunk_state = atomic_load(&m->chunk_state, memory_order_acquire); + u8 chunk_state = m->chunk_state; if (chunk_state != CHUNK_ALLOCATED) ReportInvalidFree(old_ptr, chunk_state, stack); CHECK_NE(REAL(memcpy), nullptr); @@ -716,8 +703,7 @@ struct Allocator { return ptr; } - void ReportInvalidFree(void *ptr, u32 chunk_state, - BufferedStackTrace *stack) { + void ReportInvalidFree(void *ptr, u8 chunk_state, BufferedStackTrace *stack) { if (chunk_state == CHUNK_QUARANTINE) ReportDoubleFree((uptr)ptr, stack); else @@ -793,8 +779,7 @@ struct Allocator { uptr AllocationSize(uptr p) { AsanChunk *m = GetAsanChunkByAddr(p); if (!m) return 0; - if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) - return 0; + if (m->chunk_state != CHUNK_ALLOCATED) return 0; if (m->Beg() != p) return 0; return m->UsedSize(); } @@ -860,16 +845,13 @@ static AsanAllocator &get_allocator() { } bool AsanChunkView::IsValid() const { - return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) != - CHUNK_INVALID; + return chunk_ && chunk_->chunk_state != CHUNK_INVALID; } bool AsanChunkView::IsAllocated() const { - return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == - CHUNK_ALLOCATED; + return chunk_ && chunk_->chunk_state == CHUNK_ALLOCATED; } bool AsanChunkView::IsQuarantined() const { - return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == - CHUNK_QUARANTINE; + return chunk_ && chunk_->chunk_state == CHUNK_QUARANTINE; } uptr AsanChunkView::Beg() const { return chunk_->Beg(); } uptr AsanChunkView::End() const { return Beg() + UsedSize(); } @@ -892,9 +874,7 @@ static StackTrace GetStackTraceFromId(u32 id) { return res; } -u32 AsanChunkView::GetAllocStackId() const { - return atomic_load(&chunk_->alloc_context_id, memory_order_relaxed); -} +u32 AsanChunkView::GetAllocStackId() const { return chunk_->alloc_context_id; } u32 AsanChunkView::GetFreeStackId() const { return IsQuarantined() ? chunk_->free_context_id : 0; } @@ -1081,10 +1061,10 @@ void GetAllocatorGlobalRange(uptr *begin, uptr *end) { uptr PointsIntoChunk(void* p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(addr); - if (!m || atomic_load(&m->chunk_state, memory_order_acquire) != - __asan::CHUNK_ALLOCATED) - return 0; + if (!m) return 0; uptr chunk = m->Beg(); + if (m->chunk_state != __asan::CHUNK_ALLOCATED) + return 0; if (m->AddrIsInside(addr, /*locked_version=*/true)) return chunk; if (IsSpecialCaseOfOperatorNew0(chunk, m->UsedSize(/*locked_version*/ true), @@ -1126,8 +1106,7 @@ LsanMetadata::LsanMetadata(uptr chunk) { bool LsanMetadata::allocated() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return atomic_load(&m->chunk_state, memory_order_relaxed) == - __asan::CHUNK_ALLOCATED; + return m->chunk_state == __asan::CHUNK_ALLOCATED; } ChunkTag LsanMetadata::tag() const { @@ -1147,7 +1126,7 @@ uptr LsanMetadata::requested_size() const { u32 LsanMetadata::stack_trace_id() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return atomic_load(&m->alloc_context_id, memory_order_relaxed); + return m->alloc_context_id; } void ForEachChunk(ForEachChunkCallback callback, void *arg) { @@ -1158,9 +1137,7 @@ IgnoreObjectResult IgnoreObjectLocked(const void *p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddr(addr); if (!m) return kIgnoreObjectInvalid; - if ((atomic_load(&m->chunk_state, memory_order_acquire) == - __asan::CHUNK_ALLOCATED) && - m->AddrIsInside(addr)) { + if ((m->chunk_state == __asan::CHUNK_ALLOCATED) && m->AddrIsInside(addr)) { if (m->lsan_tag == kIgnored) return kIgnoreObjectAlreadyIgnored; m->lsan_tag = __lsan::kIgnored; From b11db3606ca180521f8400aed730281108b350b4 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sun, 6 Sep 2020 00:07:14 -0700 Subject: [PATCH 278/465] [Asan] Cleanup atomic usage in allocator There are no know bugs related to this, still it may fix some latent ones. Main concerns with preexisting code: 1. Inconsistent atomic/non-atomic access to the same field. 2. Assumption that bitfield chunk_state is always the first byte without even taking into account endianness. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D86917 --- compiler-rt/lib/asan/asan_allocator.cpp | 120 ++++++++++++++---------- 1 file changed, 70 insertions(+), 50 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index a84506181362f..7334b7200fc4c 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -71,14 +71,11 @@ static AsanAllocator &get_allocator(); static const uptr kAllocBegMagic = 0xCC6E96B9; struct ChunkHeader { - u8 chunk_state; // Must be first. - u8 padding[3]; - u32 alloc_tid : 24; - u32 from_memalign : 1; - u32 alloc_type : 2; - u32 rz_log : 3; - u32 lsan_tag : 2; - // 2-nd 8 bytes + atomic_uint8_t chunk_state; + u8 from_memalign : 1; + u8 alloc_type : 2; + u8 rz_log : 3; + u8 lsan_tag : 2; // This field is used for small sizes. For large sizes it is equal to // SizeClassMap::kMaxSize and the actual size is stored in the // SecondaryAllocator's metadata. @@ -86,7 +83,8 @@ struct ChunkHeader { // align < 8 -> 0 // else -> log2(min(align, 512)) - 2 u32 user_requested_alignment_log : 3; - u32 alloc_context_id; + u32 alloc_tid; + atomic_uint32_t alloc_context_id; }; struct ChunkBase : ChunkHeader { @@ -140,8 +138,12 @@ struct QuarantineCallback { } void Recycle(AsanChunk *m) { - CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); - atomic_store((atomic_uint8_t *)m, CHUNK_INVALID, memory_order_relaxed); + u8 old_chunk_state = CHUNK_QUARANTINE; + if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, + CHUNK_INVALID, memory_order_acquire)) { + CHECK_EQ(old_chunk_state, CHUNK_QUARANTINE); + } + CHECK_NE(m->alloc_tid, kInvalidTid); CHECK_NE(m->free_tid, kInvalidTid); PoisonShadow(m->Beg(), @@ -301,22 +303,25 @@ struct Allocator { // housekeeping chunk, like TransferBatch. Start by assuming the former. AsanChunk *ac = GetAsanChunk((void *)chunk); uptr allocated_size = allocator.GetActuallyAllocatedSize((void *)ac); - uptr beg = ac->Beg(); - uptr end = ac->Beg() + ac->UsedSize(true); - uptr chunk_end = chunk + allocated_size; - if (chunk < beg && beg < end && end <= chunk_end && - ac->chunk_state == CHUNK_ALLOCATED) { - // Looks like a valid AsanChunk in use, poison redzones only. - PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); - uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); - FastPoisonShadowPartialRightRedzone( - end_aligned_down, end - end_aligned_down, - chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); - } else { - // This is either not an AsanChunk or freed or quarantined AsanChunk. - // In either case, poison everything. - PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); + if (atomic_load(&ac->chunk_state, memory_order_acquire) == + CHUNK_ALLOCATED) { + uptr beg = ac->Beg(); + uptr end = ac->Beg() + ac->UsedSize(true); + uptr chunk_end = chunk + allocated_size; + if (chunk < beg && beg < end && end <= chunk_end) { + // Looks like a valid AsanChunk in use, poison redzones only. + PoisonShadow(chunk, beg - chunk, kAsanHeapLeftRedzoneMagic); + uptr end_aligned_down = RoundDownTo(end, SHADOW_GRANULARITY); + FastPoisonShadowPartialRightRedzone( + end_aligned_down, end - end_aligned_down, + chunk_end - end_aligned_down, kAsanHeapLeftRedzoneMagic); + return; + } } + + // This is either not an AsanChunk or freed or quarantined AsanChunk. + // In either case, poison everything. + PoisonShadow(chunk, allocated_size, kAsanHeapLeftRedzoneMagic); } void ReInitialize(const AllocatorOptions &options) { @@ -381,14 +386,17 @@ struct Allocator { AsanChunk *right_chunk) { // Prefer an allocated chunk over freed chunk and freed chunk // over available chunk. - if (left_chunk->chunk_state != right_chunk->chunk_state) { - if (left_chunk->chunk_state == CHUNK_ALLOCATED) + u8 left_state = atomic_load(&left_chunk->chunk_state, memory_order_relaxed); + u8 right_state = + atomic_load(&right_chunk->chunk_state, memory_order_relaxed); + if (left_state != right_state) { + if (left_state == CHUNK_ALLOCATED) return left_chunk; - if (right_chunk->chunk_state == CHUNK_ALLOCATED) + if (right_state == CHUNK_ALLOCATED) return right_chunk; - if (left_chunk->chunk_state == CHUNK_QUARANTINE) + if (left_state == CHUNK_QUARANTINE) return left_chunk; - if (right_chunk->chunk_state == CHUNK_QUARANTINE) + if (right_state == CHUNK_QUARANTINE) return right_chunk; } // Same chunk_state: choose based on offset. @@ -403,9 +411,10 @@ struct Allocator { bool UpdateAllocationStack(uptr addr, BufferedStackTrace *stack) { AsanChunk *m = GetAsanChunkByAddr(addr); if (!m) return false; - if (m->chunk_state != CHUNK_ALLOCATED) return false; + if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) + return false; if (m->Beg() != addr) return false; - atomic_store((atomic_uint32_t *)&m->alloc_context_id, StackDepotPut(*stack), + atomic_store(&m->alloc_context_id, StackDepotPut(*stack), memory_order_relaxed); return true; } @@ -523,7 +532,8 @@ struct Allocator { } m->user_requested_alignment_log = user_requested_alignment_log; - m->alloc_context_id = StackDepotPut(*stack); + atomic_store(&m->alloc_context_id, StackDepotPut(*stack), + memory_order_relaxed); uptr size_rounded_down_to_granularity = RoundDownTo(size, SHADOW_GRANULARITY); @@ -556,7 +566,7 @@ struct Allocator { : __lsan::kDirectlyLeaked; #endif // Must be the last mutation of metadata in this function. - atomic_store((atomic_uint8_t *)m, CHUNK_ALLOCATED, memory_order_release); + atomic_store(&m->chunk_state, CHUNK_ALLOCATED, memory_order_release); ASAN_MALLOC_HOOK(res, size); return res; } @@ -564,10 +574,10 @@ struct Allocator { // Set quarantine flag if chunk is allocated, issue ASan error report on // available and quarantined chunks. Return true on success, false otherwise. bool AtomicallySetQuarantineFlagIfAllocated(AsanChunk *m, void *ptr, - BufferedStackTrace *stack) { + BufferedStackTrace *stack) { u8 old_chunk_state = CHUNK_ALLOCATED; // Flip the chunk_state atomically to avoid race on double-free. - if (!atomic_compare_exchange_strong((atomic_uint8_t *)m, &old_chunk_state, + if (!atomic_compare_exchange_strong(&m->chunk_state, &old_chunk_state, CHUNK_QUARANTINE, memory_order_acquire)) { ReportInvalidFree(ptr, old_chunk_state, stack); @@ -584,7 +594,8 @@ struct Allocator { // Expects the chunk to already be marked as quarantined by using // AtomicallySetQuarantineFlagIfAllocated. void QuarantineChunk(AsanChunk *m, void *ptr, BufferedStackTrace *stack) { - CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE); + CHECK_EQ(atomic_load(&m->chunk_state, memory_order_relaxed), + CHUNK_QUARANTINE); AsanThread *t = GetCurrentThread(); m->free_tid = t ? t->tid() : 0; m->free_context_id = StackDepotPut(*stack); @@ -676,7 +687,7 @@ struct Allocator { void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC, true); if (new_ptr) { - u8 chunk_state = m->chunk_state; + u8 chunk_state = atomic_load(&m->chunk_state, memory_order_acquire); if (chunk_state != CHUNK_ALLOCATED) ReportInvalidFree(old_ptr, chunk_state, stack); CHECK_NE(REAL(memcpy), nullptr); @@ -779,7 +790,8 @@ struct Allocator { uptr AllocationSize(uptr p) { AsanChunk *m = GetAsanChunkByAddr(p); if (!m) return 0; - if (m->chunk_state != CHUNK_ALLOCATED) return 0; + if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) + return 0; if (m->Beg() != p) return 0; return m->UsedSize(); } @@ -845,13 +857,16 @@ static AsanAllocator &get_allocator() { } bool AsanChunkView::IsValid() const { - return chunk_ && chunk_->chunk_state != CHUNK_INVALID; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) != + CHUNK_INVALID; } bool AsanChunkView::IsAllocated() const { - return chunk_ && chunk_->chunk_state == CHUNK_ALLOCATED; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == + CHUNK_ALLOCATED; } bool AsanChunkView::IsQuarantined() const { - return chunk_ && chunk_->chunk_state == CHUNK_QUARANTINE; + return chunk_ && atomic_load(&chunk_->chunk_state, memory_order_relaxed) == + CHUNK_QUARANTINE; } uptr AsanChunkView::Beg() const { return chunk_->Beg(); } uptr AsanChunkView::End() const { return Beg() + UsedSize(); } @@ -874,7 +889,9 @@ static StackTrace GetStackTraceFromId(u32 id) { return res; } -u32 AsanChunkView::GetAllocStackId() const { return chunk_->alloc_context_id; } +u32 AsanChunkView::GetAllocStackId() const { + return atomic_load(&chunk_->alloc_context_id, memory_order_relaxed); +} u32 AsanChunkView::GetFreeStackId() const { return IsQuarantined() ? chunk_->free_context_id : 0; } @@ -1061,10 +1078,10 @@ void GetAllocatorGlobalRange(uptr *begin, uptr *end) { uptr PointsIntoChunk(void* p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(addr); - if (!m) return 0; - uptr chunk = m->Beg(); - if (m->chunk_state != __asan::CHUNK_ALLOCATED) + if (!m || atomic_load(&m->chunk_state, memory_order_acquire) != + __asan::CHUNK_ALLOCATED) return 0; + uptr chunk = m->Beg(); if (m->AddrIsInside(addr, /*locked_version=*/true)) return chunk; if (IsSpecialCaseOfOperatorNew0(chunk, m->UsedSize(/*locked_version*/ true), @@ -1106,7 +1123,8 @@ LsanMetadata::LsanMetadata(uptr chunk) { bool LsanMetadata::allocated() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return m->chunk_state == __asan::CHUNK_ALLOCATED; + return atomic_load(&m->chunk_state, memory_order_relaxed) == + __asan::CHUNK_ALLOCATED; } ChunkTag LsanMetadata::tag() const { @@ -1126,7 +1144,7 @@ uptr LsanMetadata::requested_size() const { u32 LsanMetadata::stack_trace_id() const { __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); - return m->alloc_context_id; + return atomic_load(&m->alloc_context_id, memory_order_relaxed); } void ForEachChunk(ForEachChunkCallback callback, void *arg) { @@ -1137,7 +1155,9 @@ IgnoreObjectResult IgnoreObjectLocked(const void *p) { uptr addr = reinterpret_cast(p); __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddr(addr); if (!m) return kIgnoreObjectInvalid; - if ((m->chunk_state == __asan::CHUNK_ALLOCATED) && m->AddrIsInside(addr)) { + if ((atomic_load(&m->chunk_state, memory_order_acquire) == + __asan::CHUNK_ALLOCATED) && + m->AddrIsInside(addr)) { if (m->lsan_tag == kIgnored) return kIgnoreObjectAlreadyIgnored; m->lsan_tag = __lsan::kIgnored; From ff218cbc84ff3783cb5ad030397adef8c9e8d444 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 21:30:55 +0200 Subject: [PATCH 279/465] [InstSimplify] Fold degenerate abs of abs form This addresses the remaining issue from D87188. Due to a series of folds, we may end up with abs-of-abs represented as x == 0 ? -abs(x) : abs(x). Rather than recognizing this as a special abs pattern and doing an abs-of-abs fold on it afterwards, I'm directly folding this to one of the select operands in InstSimplify. The general pattern falls into the "select with operand replaced" category, but that fold is not powerful enough to recognize that both hands of the select are the same for value zero. Differential Revision: https://reviews.llvm.org/D87197 --- llvm/lib/Analysis/InstructionSimplify.cpp | 9 +++++++++ .../Transforms/InstSimplify/abs_intrinsic.ll | 18 ++++-------------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 3139b5a96b27d..7c13b41bc7e64 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3965,6 +3965,15 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt && Pred == ICmpInst::ICMP_EQ) return FalseVal; + + // X == 0 ? abs(X) : -abs(X) --> -abs(X) + // X == 0 ? -abs(X) : abs(X) --> abs(X) + if (match(TrueVal, m_Intrinsic(m_Value(X))) && + match(FalseVal, m_Neg(m_Intrinsic(m_Specific(X))))) + return FalseVal; + if (match(TrueVal, m_Neg(m_Intrinsic(m_Value(X)))) && + match(FalseVal, m_Intrinsic(m_Specific(X)))) + return FalseVal; } // Check for other compares that behave like bit test. diff --git a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll index e9305a927c42c..70b50da9f0415 100644 --- a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll +++ b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll @@ -205,10 +205,7 @@ define i1 @abs_ule_int_min(i8 %x) { define i32 @select_abs_of_abs_eq(i32 %x) { ; CHECK-LABEL: @select_abs_of_abs_eq( ; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) -; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[ABS]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[NEG]], i32 [[ABS]] -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: ret i32 [[ABS]] ; %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) %neg = sub i32 0, %abs @@ -220,10 +217,7 @@ define i32 @select_abs_of_abs_eq(i32 %x) { define i32 @select_abs_of_abs_ne(i32 %x) { ; CHECK-LABEL: @select_abs_of_abs_ne( ; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) -; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[ABS]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X]], 0 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[ABS]], i32 [[NEG]] -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: ret i32 [[ABS]] ; %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) %neg = sub i32 0, %abs @@ -236,9 +230,7 @@ define i32 @select_nabs_of_abs_eq(i32 %x) { ; CHECK-LABEL: @select_nabs_of_abs_eq( ; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) ; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[ABS]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[ABS]], i32 [[NEG]] -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: ret i32 [[NEG]] ; %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) %neg = sub i32 0, %abs @@ -251,9 +243,7 @@ define i32 @select_nabs_of_abs_ne(i32 %x) { ; CHECK-LABEL: @select_nabs_of_abs_ne( ; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) ; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[ABS]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X]], 0 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[NEG]], i32 [[ABS]] -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: ret i32 [[NEG]] ; %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) %neg = sub i32 0, %abs From 4d0312c8e05be5353c6c29b31036647dceca3ce5 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 6 Sep 2020 13:02:11 +0200 Subject: [PATCH 280/465] Add proper move ctor/move assign to APValue. NFCI. Swapping 64 bytes to make a move isn't cheap. --- clang/include/clang/AST/APValue.h | 11 ++++------- clang/lib/AST/APValue.cpp | 24 ++++++++++++++++++++---- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h index 87e4bd7f84c11..5103cfa8604e5 100644 --- a/clang/include/clang/AST/APValue.h +++ b/clang/include/clang/AST/APValue.h @@ -304,7 +304,7 @@ class APValue { MakeComplexFloat(); setComplexFloat(std::move(R), std::move(I)); } APValue(const APValue &RHS); - APValue(APValue &&RHS) : Kind(None) { swap(RHS); } + APValue(APValue &&RHS); APValue(LValueBase B, const CharUnits &O, NoLValuePath N, bool IsNullPtr = false) : Kind(None) { @@ -339,6 +339,9 @@ class APValue { return Result; } + APValue &operator=(const APValue &RHS); + APValue &operator=(APValue &&RHS); + ~APValue() { if (Kind != None && Kind != Indeterminate) DestroyDataAndMakeUninit(); @@ -591,12 +594,6 @@ class APValue { ((AddrLabelDiffData*)(char*)Data.buffer)->RHSExpr = RHSExpr; } - /// Assign by swapping from a copy of the RHS. - APValue &operator=(APValue RHS) { - swap(RHS); - return *this; - } - private: void DestroyDataAndMakeUninit(); void MakeInt() { diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 2a8834b4db0cb..08ae0ff3c67d3 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -304,6 +304,25 @@ APValue::APValue(const APValue &RHS) : Kind(None) { } } +APValue::APValue(APValue &&RHS) : Kind(RHS.Kind), Data(RHS.Data) { + RHS.Kind = None; +} + +APValue &APValue::operator=(const APValue &RHS) { + if (this != &RHS) + *this = APValue(RHS); + return *this; +} + +APValue &APValue::operator=(APValue &&RHS) { + if (Kind != None && Kind != Indeterminate) + DestroyDataAndMakeUninit(); + Kind = RHS.Kind; + Data = RHS.Data; + RHS.Kind = None; + return *this; +} + void APValue::DestroyDataAndMakeUninit() { if (Kind == Int) ((APSInt*)(char*)Data.buffer)->~APSInt(); @@ -372,10 +391,7 @@ bool APValue::needsCleanup() const { void APValue::swap(APValue &RHS) { std::swap(Kind, RHS.Kind); - char TmpData[DataSize]; - memcpy(TmpData, Data.buffer, DataSize); - memcpy(Data.buffer, RHS.Data.buffer, DataSize); - memcpy(RHS.Data.buffer, TmpData, DataSize); + std::swap(Data, RHS.Data); } static double GetApproxValue(const llvm::APFloat &F) { From d866dc374986ac1cff6b4950ea5fa3f8687fdadd Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 6 Sep 2020 12:51:43 +0100 Subject: [PATCH 281/465] [ARM] Regenerate tests. NFC --- .../CodeGen/Thumb2/mve-gather-scatter-tailpred.ll | 13 ++++++++++--- llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll | 11 ++++++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll index 6204c06303432..829aabf4b35ca 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll @@ -14,7 +14,7 @@ define dso_local void @mve_gather_qi_wb(i32* noalias nocapture readonly %A, i32* ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vadd.i32 q0, q0, r1 ; CHECK-NEXT: adds r1, r3, #4 -; CHECK: .LBB0_1: @ %vector.body +; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vmov q2, q1 @@ -30,6 +30,13 @@ define dso_local void @mve_gather_qi_wb(i32* noalias nocapture readonly %A, i32* ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: str.w r0, [r2, r1, lsl #2] ; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 4294967228 @ 0xffffffbc +; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 +; CHECK-NEXT: .long 4294967268 @ 0xffffffe4 +; CHECK-NEXT: .long 4294967288 @ 0xfffffff8 entry: ; preds = %middle. %add.us.us = add i32 4, %n %arrayidx.us.us = getelementptr inbounds i32, i32* %C, i32 %add.us.us @@ -79,7 +86,7 @@ define dso_local void @mve_gatherscatter_offset(i32* noalias nocapture readonly ; CHECK-NEXT: vmov.i32 q2, #0x0 ; CHECK-NEXT: vmov.i32 q0, #0x14 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB1_1: @ %vector.body +; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vmov q3, q2 @@ -154,7 +161,7 @@ define dso_local void @mve_scatter_qi(i32* noalias nocapture readonly %A, i32* n ; CHECK-NEXT: vadd.i32 q0, q0, r1 ; CHECK-NEXT: adds r1, r3, #4 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB2_1: @ %vector.body +; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vmov q3, q1 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll index 5669fdf38fee0..ed7e84a899d24 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -24,7 +24,7 @@ define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture % ; CHECK-NEXT: vmov.i32 q3, #0x4 ; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: dls lr, lr -; CHECK: .LBB0_1: @ %do.body +; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: sub.w r12, r12, #4 @@ -48,6 +48,15 @@ define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture % ; CHECK-NEXT: vstr s8, [r2] ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r6, r7, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 entry: %0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 1) %1 = extractvalue { <4 x i32>, i32 } %0, 0 From 667e800bb3a8c1bdda0cabad7549c766b3424064 Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 6 Sep 2020 13:19:55 +0100 Subject: [PATCH 282/465] [ARM] Remove -O3 from mve intrinsic tests. NFC --- .../test/CodeGen/arm-mve-intrinsics/vmaxaq.c | 28 ++++++------- .../CodeGen/arm-mve-intrinsics/vmaxnmaq.c | 24 +++++------ .../test/CodeGen/arm-mve-intrinsics/vmaxnmq.c | 24 +++++------ clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c | 40 +++++++++---------- .../test/CodeGen/arm-mve-intrinsics/vminaq.c | 28 ++++++------- .../CodeGen/arm-mve-intrinsics/vminnmaq.c | 24 +++++------ .../test/CodeGen/arm-mve-intrinsics/vminnmq.c | 24 +++++------ clang/test/CodeGen/arm-mve-intrinsics/vminq.c | 40 +++++++++---------- 8 files changed, 116 insertions(+), 116 deletions(-) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c index 03ab37474ba02..a656657b66197 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include @@ -9,8 +9,8 @@ // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <16 x i8> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> [[TMP2]], <16 x i8> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp uge <16 x i8> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> [[A]], <16 x i8> [[TMP2]] // CHECK-NEXT: ret <16 x i8> [[TMP4]] // uint8x16_t test_vmaxaq_s8(uint8x16_t a, int8x16_t b) @@ -27,8 +27,8 @@ uint8x16_t test_vmaxaq_s8(uint8x16_t a, int8x16_t b) // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <8 x i16> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <8 x i16> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[A]], <8 x i16> [[TMP2]] // CHECK-NEXT: ret <8 x i16> [[TMP4]] // uint16x8_t test_vmaxaq_s16(uint16x8_t a, int16x8_t b) @@ -45,8 +45,8 @@ uint16x8_t test_vmaxaq_s16(uint16x8_t a, int16x8_t b) // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[A]], <4 x i32> [[TMP2]] // CHECK-NEXT: ret <4 x i32> [[TMP4]] // uint32x4_t test_vmaxaq_s32(uint32x4_t a, int32x4_t b) @@ -61,8 +61,8 @@ uint32x4_t test_vmaxaq_s32(uint32x4_t a, int32x4_t b) // CHECK-LABEL: @test_vmaxaq_m_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.vmaxa.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmaxa.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vmaxaq_m_s8(uint8x16_t a, int8x16_t b, mve_pred16_t p) @@ -77,8 +77,8 @@ uint8x16_t test_vmaxaq_m_s8(uint8x16_t a, int8x16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxaq_m_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.vmaxa.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmaxa.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vmaxaq_m_s16(uint16x8_t a, int16x8_t b, mve_pred16_t p) @@ -93,8 +93,8 @@ uint16x8_t test_vmaxaq_m_s16(uint16x8_t a, int16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxaq_m_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.vmaxa.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmaxa.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vmaxaq_m_s32(uint32x4_t a, int32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c index 20c22056d52a5..52b439fe5555f 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c @@ -1,14 +1,14 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vmaxnmaq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b) @@ -22,9 +22,9 @@ float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b) // CHECK-LABEL: @test_vmaxnmaq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b) @@ -39,8 +39,8 @@ float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b) // CHECK-LABEL: @test_vmaxnmaq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -55,8 +55,8 @@ float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxnmaq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c index 5caf8d6421feb..19b5d28a52440 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c @@ -1,12 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vmaxnmq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) @@ -20,7 +20,7 @@ float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) // CHECK-LABEL: @test_vmaxnmq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) @@ -35,8 +35,8 @@ float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) // CHECK-LABEL: @test_vmaxnmq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -51,8 +51,8 @@ float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t // CHECK-LABEL: @test_vmaxnmq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) @@ -67,8 +67,8 @@ float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t // CHECK-LABEL: @test_vmaxnmq_x_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -83,8 +83,8 @@ float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxnmq_x_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c index d0ddc7a99e9f8..7fb2f5191f440 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c @@ -1,13 +1,13 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vmaxq_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[A]], <16 x i8> [[B]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] // int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) @@ -21,8 +21,8 @@ int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) // CHECK-LABEL: @test_vmaxq_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp ult <8 x i16> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[A]], <8 x i16> [[B]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] // uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) @@ -36,8 +36,8 @@ uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) // CHECK-LABEL: @test_vmaxq_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) @@ -52,8 +52,8 @@ int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) // CHECK-LABEL: @test_vmaxq_m_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vmaxq_m_u8(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) @@ -68,8 +68,8 @@ uint8x16_t test_vmaxq_m_u8(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_ // CHECK-LABEL: @test_vmaxq_m_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vmaxq_m_s16(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) @@ -84,8 +84,8 @@ int16x8_t test_vmaxq_m_s16(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pre // CHECK-LABEL: @test_vmaxq_m_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vmaxq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) @@ -100,8 +100,8 @@ uint32x4_t test_vmaxq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve // CHECK-LABEL: @test_vmaxq_x_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vmaxq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) @@ -116,8 +116,8 @@ uint8x16_t test_vmaxq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxq_x_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vmaxq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) @@ -132,8 +132,8 @@ uint16x8_t test_vmaxq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vmaxq_x_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vmaxq_x_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c index b23dc1a27be86..6a6279cce0df2 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminaq.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include @@ -9,8 +9,8 @@ // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ult <16 x i8> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> [[TMP2]], <16 x i8> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp ule <16 x i8> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> [[A]], <16 x i8> [[TMP2]] // CHECK-NEXT: ret <16 x i8> [[TMP4]] // uint8x16_t test_vminaq_s8(uint8x16_t a, int8x16_t b) @@ -27,8 +27,8 @@ uint8x16_t test_vminaq_s8(uint8x16_t a, int8x16_t b) // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <8 x i16> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ult <8 x i16> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp ule <8 x i16> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[A]], <8 x i16> [[TMP2]] // CHECK-NEXT: ret <8 x i16> [[TMP4]] // uint16x8_t test_vminaq_s16(uint16x8_t a, int16x8_t b) @@ -45,8 +45,8 @@ uint16x8_t test_vminaq_s16(uint16x8_t a, int16x8_t b) // CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[B]] // CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[B]] -// CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[TMP2]], [[A:%.*]] -// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[A]], <4 x i32> [[TMP2]] // CHECK-NEXT: ret <4 x i32> [[TMP4]] // uint32x4_t test_vminaq_s32(uint32x4_t a, int32x4_t b) @@ -61,8 +61,8 @@ uint32x4_t test_vminaq_s32(uint32x4_t a, int32x4_t b) // CHECK-LABEL: @test_vminaq_m_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.vmina.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmina.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vminaq_m_s8(uint8x16_t a, int8x16_t b, mve_pred16_t p) @@ -77,8 +77,8 @@ uint8x16_t test_vminaq_m_s8(uint8x16_t a, int8x16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminaq_m_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.vmina.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmina.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vminaq_m_s16(uint16x8_t a, int16x8_t b, mve_pred16_t p) @@ -93,8 +93,8 @@ uint16x8_t test_vminaq_m_s16(uint16x8_t a, int16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminaq_m_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.vmina.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmina.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vminaq_m_s32(uint32x4_t a, int32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c index b13d851aec79b..5ddc3914f1857 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c @@ -1,14 +1,14 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vminnmaq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b) @@ -22,9 +22,9 @@ float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b) // CHECK-LABEL: @test_vminnmaq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b) @@ -39,8 +39,8 @@ float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b) // CHECK-LABEL: @test_vminnmaq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -55,8 +55,8 @@ float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminnmaq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c index 7ebcf45d88330..0723dfae2f064 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c @@ -1,12 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vminnmq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) @@ -20,7 +20,7 @@ float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) // CHECK-LABEL: @test_vminnmq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) @@ -35,8 +35,8 @@ float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) // CHECK-LABEL: @test_vminnmq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -51,8 +51,8 @@ float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t // CHECK-LABEL: @test_vminnmq_m_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) @@ -67,8 +67,8 @@ float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t // CHECK-LABEL: @test_vminnmq_x_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) // CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) @@ -83,8 +83,8 @@ float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminnmq_x_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) // CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c index d4186858b121a..1f3b0d670ee17 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c @@ -1,13 +1,13 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include // CHECK-LABEL: @test_vminq_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[A]], <16 x i8> [[B]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] // uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) @@ -21,8 +21,8 @@ uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) // CHECK-LABEL: @test_vminq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp sle <8 x i16> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[A]], <8 x i16> [[B]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] // int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) @@ -36,8 +36,8 @@ int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) // CHECK-LABEL: @test_vminq_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) @@ -52,8 +52,8 @@ uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) // CHECK-LABEL: @test_vminq_m_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vminq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) @@ -68,8 +68,8 @@ int8x16_t test_vminq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred // CHECK-LABEL: @test_vminq_m_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vminq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) @@ -84,8 +84,8 @@ uint16x8_t test_vminq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve // CHECK-LABEL: @test_vminq_m_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vminq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) @@ -100,8 +100,8 @@ int32x4_t test_vminq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pre // CHECK-LABEL: @test_vminq_x_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) // CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vminq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) @@ -116,8 +116,8 @@ uint8x16_t test_vminq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminq_x_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) // CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vminq_x_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) @@ -132,8 +132,8 @@ int16x8_t test_vminq_x_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) // CHECK-LABEL: @test_vminq_x_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vminq_x_s32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) From ecac5c28089283fbaef1fec758535ca700095a09 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Sep 2020 14:52:06 +0100 Subject: [PATCH 283/465] [X86][AVX] lowerShuffleWithPERMV - adjust binary shuffle masks to account for widening on non-VLX targets rGabd33bf5eff2 enabled us to pad 128/256-bit shuffles to 512-bit on non-VLX targets, but wasn't updating binary shuffles to account for the new vector width. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 17 +++++++++++++--- .../X86/shuffle-strided-with-offset-512.ll | 20 +++++++++++-------- llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll | 18 +++++++++-------- 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1212585b4baf5..99d35f0c91ffa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14951,16 +14951,27 @@ static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + int NumElts = VT.getVectorNumElements(); MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); - MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); - SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true); + MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, NumElts); + SDValue MaskNode; MVT ShuffleVT = VT; if (!VT.is512BitVector() && !Subtarget.hasVLX()) { V1 = widenSubVector(V1, false, Subtarget, DAG, DL, 512); V2 = widenSubVector(V2, false, Subtarget, DAG, DL, 512); - MaskNode = widenSubVector(MaskNode, false, Subtarget, DAG, DL, 512); ShuffleVT = V1.getSimpleValueType(); + + // Adjust mask to correct indices for the second input. + unsigned Scale = 512 / VT.getSizeInBits(); + SmallVector AdjustedMask(Mask.begin(), Mask.end()); + for (int &M : AdjustedMask) + if (NumElts <= M) + M += (Scale - 1) * NumElts; + MaskNode = getConstVector(AdjustedMask, MaskVecVT, DAG, DL, true); + MaskNode = widenSubVector(MaskNode, false, Subtarget, DAG, DL, 512); + } else { + MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true); } SDValue Result; diff --git a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll index 40cd2fcd4fdeb..e0994e5b58faf 100644 --- a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll @@ -85,9 +85,10 @@ define void @shuffle_v32i16_to_v16i16_1(<32 x i16>* %L, <16 x i16>* %S) nounwind ; ; AVX512BW-LABEL: shuffle_v32i16_to_v16i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,17,19,21,23,9,11,13,15,25,27,29,31] +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,33,35,37,39,9,11,13,15,41,43,45,47] ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm1[0,2,1,3] ; AVX512BW-NEXT: vmovdqa %ymm0, (%rsi) ; AVX512BW-NEXT: vzeroupper @@ -258,9 +259,10 @@ define void @shuffle_v32i16_to_v8i16_1(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <1,5,9,13,17,21,25,29,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <1,5,9,13,33,37,41,45,u,u,u,u,u,u,u,u> ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -316,9 +318,10 @@ define void @shuffle_v32i16_to_v8i16_2(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_2: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <2,6,10,14,18,22,26,30,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <2,6,10,14,34,38,42,46,u,u,u,u,u,u,u,u> ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -374,9 +377,10 @@ define void @shuffle_v32i16_to_v8i16_3(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_3: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <3,7,11,15,19,23,27,31,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <3,7,11,15,35,39,43,47,u,u,u,u,u,u,u,u> ; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll index e4be8f5a273be..e6821daa97ca3 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll @@ -327,8 +327,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; ; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512VBMI-NEXT: vpermt2b %zmm0, %zmm1, %zmm0 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61] +; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 ; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq @@ -412,8 +412,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; ; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512VBMI-NEXT: vpermt2b %zmm0, %zmm1, %zmm0 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62] +; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 ; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq @@ -455,9 +455,10 @@ define <4 x double> @PR34175(<32 x i16>* %p) { ; ; AVX512BW-LABEL: PR34175: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,32,40,u,u,u,u,u,u,u,u,u,u,u,u> ; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1 -; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqu 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX512BW-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512BW-NEXT: retq @@ -473,9 +474,10 @@ define <4 x double> @PR34175(<32 x i16>* %p) { ; ; AVX512VBMI-LABEL: PR34175: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,32,40,u,u,u,u,u,u,u,u,u,u,u,u> ; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1 -; AVX512VBMI-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %ymm2 +; AVX512VBMI-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 ; AVX512VBMI-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX512VBMI-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512VBMI-NEXT: retq From 8c386c94749a78392fd763f8449ca3e55f030ffd Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 6 Sep 2020 18:05:24 +0200 Subject: [PATCH 284/465] [SmallVector] Move error handling out of line This reduces duplication and avoids emitting ice cold code into every instance of grow(). --- llvm/include/llvm/ADT/SmallVector.h | 36 +++++++------------- llvm/lib/Support/SmallVector.cpp | 51 ++++++++++++++++++----------- 2 files changed, 43 insertions(+), 44 deletions(-) diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 8197b8688a529..c3c6a366dab2d 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -32,9 +32,6 @@ #include #include #include -#ifdef LLVM_ENABLE_EXCEPTIONS -#include -#endif namespace llvm { @@ -65,6 +62,13 @@ template class SmallVectorBase { /// This function will report a fatal error if it cannot increase capacity. void grow_pod(void *FirstEl, size_t MinSize, size_t TSize); + /// Report that MinSize doesn't fit into this vector's size type. Throws + /// std::length_error or calls report_fatal_error. + LLVM_ATTRIBUTE_NORETURN static void report_size_overflow(size_t MinSize); + /// Report that this vector is already at maximum capacity. Throws + /// std::length_error or calls report_fatal_error. + LLVM_ATTRIBUTE_NORETURN static void report_at_maximum_capacity(); + public: size_t size() const { return Size; } size_t capacity() const { return Capacity; } @@ -271,32 +275,16 @@ template void SmallVectorTemplateBase::grow(size_t MinSize) { // Ensure we can fit the new capacity. // This is only going to be applicable when the capacity is 32 bit. - if (MinSize > this->SizeTypeMax()) { - std::string Reason = "SmallVector unable to grow. Requested capacity (" + - std::to_string(MinSize) + - ") is larger than maximum value for size type (" + - std::to_string(this->SizeTypeMax()) + ")"; -#ifdef LLVM_ENABLE_EXCEPTIONS - throw std::length_error(Reason); -#else - report_fatal_error(Reason); -#endif - } + if (MinSize > this->SizeTypeMax()) + this->report_size_overflow(MinSize); // Ensure we can meet the guarantee of space for at least one more element. // The above check alone will not catch the case where grow is called with a // default MinSize of 0, but the current capacity cannot be increased. // This is only going to be applicable when the capacity is 32 bit. - if (this->capacity() == this->SizeTypeMax()) { - std::string Reason = - "SmallVector capacity unable to grow. Already at maximum size " + - std::to_string(this->SizeTypeMax()); -#ifdef LLVM_ENABLE_EXCEPTIONS - throw std::length_error(Reason); -#else - report_fatal_error(Reason); -#endif - } + if (this->capacity() == this->SizeTypeMax()) + this->report_at_maximum_capacity(); + // Always grow, even from zero. size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2)); NewCapacity = std::min(std::max(NewCapacity, MinSize), this->SizeTypeMax()); diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp index 73137640536c3..debde5cdad5b6 100644 --- a/llvm/lib/Support/SmallVector.cpp +++ b/llvm/lib/Support/SmallVector.cpp @@ -12,6 +12,9 @@ #include "llvm/ADT/SmallVector.h" #include +#ifdef LLVM_ENABLE_EXCEPTIONS +#include +#endif using namespace llvm; // Check that no bytes are wasted and everything is well-aligned. @@ -42,37 +45,45 @@ static_assert(sizeof(SmallVector) == sizeof(void *) * 2 + sizeof(void *), "1 byte elements have word-sized type for size and capacity"); +template +void SmallVectorBase::report_size_overflow(size_t MinSize) { + std::string Reason = "SmallVector unable to grow. Requested capacity (" + + std::to_string(MinSize) + + ") is larger than maximum value for size type (" + + std::to_string(SizeTypeMax()) + ")"; +#ifdef LLVM_ENABLE_EXCEPTIONS + throw std::length_error(Reason); +#else + report_fatal_error(Reason); +#endif +} + +template void SmallVectorBase::report_at_maximum_capacity() { + std::string Reason = + "SmallVector capacity unable to grow. Already at maximum size " + + std::to_string(SizeTypeMax()); +#ifdef LLVM_ENABLE_EXCEPTIONS + throw std::length_error(Reason); +#else + report_fatal_error(Reason); +#endif +} + // Note: Moving this function into the header may cause performance regression. template void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSize, size_t TSize) { // Ensure we can fit the new capacity. // This is only going to be applicable when the capacity is 32 bit. - if (MinSize > SizeTypeMax()) { - std::string Reason = "SmallVector unable to grow. Requested capacity (" + - std::to_string(MinSize) + - ") is larger than maximum value for size type (" + - std::to_string(SizeTypeMax()) + ")"; -#ifdef LLVM_ENABLE_EXCEPTIONS - throw std::length_error(Reason); -#else - report_fatal_error(Reason); -#endif - } + if (MinSize > SizeTypeMax()) + report_size_overflow(MinSize); // Ensure we can meet the guarantee of space for at least one more element. // The above check alone will not catch the case where grow is called with a // default MinSize of 0, but the current capacity cannot be increased. // This is only going to be applicable when the capacity is 32 bit. - if (capacity() == SizeTypeMax()) { - std::string Reason = - "SmallVector capacity unable to grow. Already at maximum size " + - std::to_string(SizeTypeMax()); -#ifdef LLVM_ENABLE_EXCEPTIONS - throw std::length_error(Reason); -#endif - report_fatal_error(Reason); - } + if (capacity() == SizeTypeMax()) + report_at_maximum_capacity(); // In theory 2*capacity can overflow if the capacity is 64 bit, but the // original capacity would never be large enough for this to be a problem. From cc6713a2c35edf17cfb567284cc76b374308e5e4 Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Sun, 6 Sep 2020 20:34:32 +0200 Subject: [PATCH 285/465] Thread safety analysis: Test and document release_generic_capability The old locking attributes had a generic release, but as it turns out the capability-based attributes have it as well. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D87064 --- clang/docs/ThreadSafetyAnalysis.rst | 26 ++++++++++++------- .../test/SemaCXX/thread-safety-annotations.h | 3 ++- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/clang/docs/ThreadSafetyAnalysis.rst b/clang/docs/ThreadSafetyAnalysis.rst index b8d7d24275b92..e6ce0e04e70ab 100644 --- a/clang/docs/ThreadSafetyAnalysis.rst +++ b/clang/docs/ThreadSafetyAnalysis.rst @@ -209,21 +209,21 @@ must be held on entry to the function, *and must still be held on exit*. } -ACQUIRE(...), ACQUIRE_SHARED(...), RELEASE(...), RELEASE_SHARED(...) --------------------------------------------------------------------- +ACQUIRE(...), ACQUIRE_SHARED(...), RELEASE(...), RELEASE_SHARED(...), RELEASE_GENERIC(...) +------------------------------------------------------------------------------------------ *Previously*: ``EXCLUSIVE_LOCK_FUNCTION``, ``SHARED_LOCK_FUNCTION``, ``UNLOCK_FUNCTION`` -``ACQUIRE`` is an attribute on functions or methods, which -declares that the function acquires a capability, but does not release it. The -caller must not hold the given capability on entry, and it will hold the -capability on exit. ``ACQUIRE_SHARED`` is similar. +``ACQUIRE`` and ``ACQUIRE_SHARED`` are attributes on functions or methods +declaring that the function acquires a capability, but does not release it. +The given capability must not be held on entry, and will be held on exit +(exclusively for ``ACQUIRE``, shared for ``ACQUIRE_SHARED``). -``RELEASE`` and ``RELEASE_SHARED`` declare that the function releases the given -capability. The caller must hold the capability on entry, and will no longer -hold it on exit. It does not matter whether the given capability is shared or -exclusive. +``RELEASE``, ``RELEASE_SHARED``, and ``RELEASE_GENERIC`` declare that the +function releases the given capability. The capability must be held on entry +(exclusively for ``RELEASE``, shared for ``RELEASE_SHARED``, exclusively or +shared for ``RELEASE_GENERIC``), and will no longer be held on exit. .. code-block:: c++ @@ -820,6 +820,9 @@ implementation. #define RELEASE_SHARED(...) \ THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__)) + #define RELEASE_GENERIC(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(release_generic_capability(__VA_ARGS__)) + #define TRY_ACQUIRE(...) \ THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__)) @@ -864,6 +867,9 @@ implementation. // Release/unlock a shared mutex. void ReaderUnlock() RELEASE_SHARED(); + // Generic unlock, can unlock exclusive and shared mutexes. + void GenericUnlock() RELEASE_GENERIC(); + // Try to acquire the mutex. Returns true on success, and false on failure. bool TryLock() TRY_ACQUIRE(true); diff --git a/clang/test/SemaCXX/thread-safety-annotations.h b/clang/test/SemaCXX/thread-safety-annotations.h index 7755a1b328e7e..d89bcf8ff4706 100644 --- a/clang/test/SemaCXX/thread-safety-annotations.h +++ b/clang/test/SemaCXX/thread-safety-annotations.h @@ -6,6 +6,7 @@ #define ASSERT_SHARED_LOCK(...) __attribute__((assert_shared_capability(__VA_ARGS__))) #define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((acquire_capability(__VA_ARGS__))) #define SHARED_LOCK_FUNCTION(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) +#define UNLOCK_FUNCTION(...) __attribute__((release_generic_capability(__VA_ARGS__))) #define EXCLUSIVE_TRYLOCK_FUNCTION(...) __attribute__((try_acquire_capability(__VA_ARGS__))) #define SHARED_TRYLOCK_FUNCTION(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) #define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((requires_capability(__VA_ARGS__))) @@ -16,6 +17,7 @@ #define ASSERT_SHARED_LOCK(...) __attribute__((assert_shared_lock(__VA_ARGS__))) #define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__))) #define SHARED_LOCK_FUNCTION(...) __attribute__((shared_lock_function(__VA_ARGS__))) +#define UNLOCK_FUNCTION(...) __attribute__((unlock_function(__VA_ARGS__))) #define EXCLUSIVE_TRYLOCK_FUNCTION(...) __attribute__((exclusive_trylock_function(__VA_ARGS__))) #define SHARED_TRYLOCK_FUNCTION(...) __attribute__((shared_trylock_function(__VA_ARGS__))) #define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__))) @@ -23,7 +25,6 @@ #endif // Lock semantics only -#define UNLOCK_FUNCTION(...) __attribute__((unlock_function(__VA_ARGS__))) #define GUARDED_VAR __attribute__((guarded_var)) #define PT_GUARDED_VAR __attribute__((pt_guarded_var)) From bbb3baf6205c54231257f64fd18661a13a5c97ee Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Sun, 6 Sep 2020 20:35:02 +0200 Subject: [PATCH 286/465] Thread safety analysis: Improve documentation for scoped capabilities They are for more powerful than the current documentation implies, this adds * adopting a lock, * deferring a lock, * manually unlocking the scoped capability, * relocking the scoped capability, possibly in a different mode, * try-relocking the scoped capability. Also there is now a generic explanation how attributes on scoped capabilities work. There has been confusion in the past about how to annotate them (see e.g. PR33504), hopefully this clears things up. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D87066 --- clang/docs/ThreadSafetyAnalysis.rst | 68 ++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/clang/docs/ThreadSafetyAnalysis.rst b/clang/docs/ThreadSafetyAnalysis.rst index e6ce0e04e70ab..e4a3342c02bd8 100644 --- a/clang/docs/ThreadSafetyAnalysis.rst +++ b/clang/docs/ThreadSafetyAnalysis.rst @@ -402,6 +402,13 @@ the destructor. Such classes require special handling because the constructor and destructor refer to the capability via different names; see the ``MutexLocker`` class in :ref:`mutexheader`, below. +Scoped capabilities are treated as capabilities that are implicitly acquired +on construction and released on destruction. They are associated with +the set of (regular) capabilities named in thread safety attributes on the +constructor. Acquire-type attributes on other member functions are treated as +applying to that set of associated capabilities, while ``RELEASE`` implies that +a function releases all associated capabilities in whatever mode they're held. + TRY_ACQUIRE(, ...), TRY_ACQUIRE_SHARED(, ...) --------------------------------------------------------- @@ -886,19 +893,78 @@ implementation. const Mutex& operator!() const { return *this; } }; + // Tag types for selecting a constructor. + struct adopt_lock_t {} inline constexpr adopt_lock = {}; + struct defer_lock_t {} inline constexpr defer_lock = {}; + struct shared_lock_t {} inline constexpr shared_lock = {}; // MutexLocker is an RAII class that acquires a mutex in its constructor, and // releases it in its destructor. class SCOPED_CAPABILITY MutexLocker { private: Mutex* mut; + bool locked; public: - MutexLocker(Mutex *mu) ACQUIRE(mu) : mut(mu) { + // Acquire mu, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu) ACQUIRE(mu) : mut(mu), locked(true) { mu->Lock(); } + + // Assume mu is held, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu, adopt_lock_t) REQUIRES(mu) : mut(mu), locked(true) {} + + // Acquire mu in shared mode, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu, shared_lock_t) ACQUIRE_SHARED(mu) : mut(mu), locked(true) { + mu->ReaderLock(); + } + + // Assume mu is held in shared mode, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu, adopt_lock_t, shared_lock_t) REQUIRES_SHARED(mu) + : mut(mu), locked(true) {} + + // Assume mu is not held, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu, defer_lock_t) EXCLUDES(mu) : mut(mu), locked(false) {} + + // Release *this and all associated mutexes, if they are still held. + // There is no warning if the scope was already unlocked before. ~MutexLocker() RELEASE() { + if (locked) + mut->GenericUnlock(); + } + + // Acquire all associated mutexes exclusively. + void Lock() ACQUIRE() { + mut->Lock(); + locked = true; + } + + // Try to acquire all associated mutexes exclusively. + bool TryLock() TRY_ACQUIRE(true) { + return locked = mut->TryLock(); + } + + // Acquire all associated mutexes in shared mode. + void ReaderLock() ACQUIRE_SHARED() { + mut->ReaderLock(); + locked = true; + } + + // Try to acquire all associated mutexes in shared mode. + bool ReaderTryLock() TRY_ACQUIRE_SHARED(true) { + return locked = mut->ReaderTryLock(); + } + + // Release all associated mutexes. Warn on double unlock. + void Unlock() RELEASE() { mut->Unlock(); + locked = false; + } + + // Release all associated mutexes. Warn on double unlock. + void ReaderUnlock() RELEASE() { + mut->ReaderUnlock(); + locked = false; } }; From 54d432aa6b835ee7e835d0626c15ca5e7eb83ab4 Mon Sep 17 00:00:00 2001 From: zhanghb97 Date: Sun, 6 Sep 2020 11:37:16 -0700 Subject: [PATCH 287/465] [mlir] Add Shaped Type, Tensor Type and MemRef Type to python bindings. Based on the PyType and PyConcreteType classes, this patch implements the bindings of Shaped Type, Tensor Type and MemRef Type subclasses. The Tensor Type and MemRef Type are bound as ranked and unranked separately. This patch adds the ***GetChecked C API to make sure the python side can get a valid type or a nullptr. Shaped type is not a kind of standard types, it is the base class for vectors, memrefs and tensors, this patch binds the PyShapedType class as the base class of Vector Type, Tensor Type and MemRef Type subclasses. Reviewed By: stellaraccident Differential Revision: https://reviews.llvm.org/D87091 --- mlir/include/mlir-c/StandardTypes.h | 28 +++ mlir/lib/Bindings/Python/IRModules.cpp | 280 +++++++++++++++++++++++-- mlir/lib/CAPI/IR/StandardTypes.cpp | 36 ++++ mlir/test/Bindings/Python/ir_types.py | 170 ++++++++++++++- 4 files changed, 492 insertions(+), 22 deletions(-) diff --git a/mlir/include/mlir-c/StandardTypes.h b/mlir/include/mlir-c/StandardTypes.h index ad28ea5467171..eacfe0d39b6aa 100644 --- a/mlir/include/mlir-c/StandardTypes.h +++ b/mlir/include/mlir-c/StandardTypes.h @@ -162,6 +162,11 @@ int mlirTypeIsAVector(MlirType type); * is owned by the context. */ MlirType mlirVectorTypeGet(intptr_t rank, int64_t *shape, MlirType elementType); +/** Same as "mlirVectorTypeGet" but returns a nullptr wrapping MlirType on + * illegal arguments, emitting appropriate diagnostics. */ +MlirType mlirVectorTypeGetChecked(intptr_t rank, int64_t *shape, + MlirType elementType, MlirLocation loc); + /*============================================================================*/ /* Ranked / Unranked Tensor type. */ /*============================================================================*/ @@ -180,10 +185,20 @@ int mlirTypeIsAUnrankedTensor(MlirType type); MlirType mlirRankedTensorTypeGet(intptr_t rank, int64_t *shape, MlirType elementType); +/** Same as "mlirRankedTensorTypeGet" but returns a nullptr wrapping MlirType on + * illegal arguments, emitting appropriate diagnostics. */ +MlirType mlirRankedTensorTypeGetChecked(intptr_t rank, int64_t *shape, + MlirType elementType, MlirLocation loc); + /** Creates an unranked tensor type with the given element type in the same * context as the element type. The type is owned by the context. */ MlirType mlirUnrankedTensorTypeGet(MlirType elementType); +/** Same as "mlirUnrankedTensorTypeGet" but returns a nullptr wrapping MlirType + * on illegal arguments, emitting appropriate diagnostics. */ +MlirType mlirUnrankedTensorTypeGetChecked(MlirType elementType, + MlirLocation loc); + /*============================================================================*/ /* Ranked / Unranked MemRef type. */ /*============================================================================*/ @@ -208,10 +223,23 @@ MlirType mlirMemRefTypeGet(MlirType elementType, intptr_t rank, int64_t *shape, MlirType mlirMemRefTypeContiguousGet(MlirType elementType, intptr_t rank, int64_t *shape, unsigned memorySpace); +/** Same as "mlirMemRefTypeContiguousGet" but returns a nullptr wrapping + * MlirType on illegal arguments, emitting appropriate diagnostics. */ +MlirType mlirMemRefTypeContiguousGetChecked(MlirType elementType, intptr_t rank, + int64_t *shape, + unsigned memorySpace, + MlirLocation loc); + /** Creates an Unranked MemRef type with the given element type and in the given * memory space. The type is owned by the context of element type. */ MlirType mlirUnrankedMemRefTypeGet(MlirType elementType, unsigned memorySpace); +/** Same as "mlirUnrankedMemRefTypeGet" but returns a nullptr wrapping + * MlirType on illegal arguments, emitting appropriate diagnostics. */ +MlirType mlirUnrankedMemRefTypeGetChecked(MlirType elementType, + unsigned memorySpace, + MlirLocation loc); + /** Returns the number of affine layout maps in the given MemRef type. */ intptr_t mlirMemRefTypeGetNumAffineMaps(MlirType type); diff --git a/mlir/lib/Bindings/Python/IRModules.cpp b/mlir/lib/Bindings/Python/IRModules.cpp index 70c1a28e92bef..149e231aed0b4 100644 --- a/mlir/lib/Bindings/Python/IRModules.cpp +++ b/mlir/lib/Bindings/Python/IRModules.cpp @@ -516,30 +516,269 @@ class PyComplexType : public PyConcreteType { } }; -/// Vector Type subclass - VectorType. -class PyVectorType : public PyConcreteType { +class PyShapedType : public PyConcreteType { public: - static constexpr IsAFunctionTy isaFunction = mlirTypeIsAVector; - static constexpr const char *pyClassName = "VectorType"; + static constexpr IsAFunctionTy isaFunction = mlirTypeIsAShaped; + static constexpr const char *pyClassName = "ShapedType"; using PyConcreteType::PyConcreteType; static void bindDerived(ClassTy &c) { + c.def_property_readonly( + "element_type", + [](PyShapedType &self) { + MlirType t = mlirShapedTypeGetElementType(self.type); + return PyType(t); + }, + py::keep_alive<0, 1>(), "Returns the element type of the shaped type."); + c.def_property_readonly( + "has_rank", + [](PyShapedType &self) -> bool { + return mlirShapedTypeHasRank(self.type); + }, + "Returns whether the given shaped type is ranked."); + c.def_property_readonly( + "rank", + [](PyShapedType &self) { + self.requireHasRank(); + return mlirShapedTypeGetRank(self.type); + }, + "Returns the rank of the given ranked shaped type."); + c.def_property_readonly( + "has_static_shape", + [](PyShapedType &self) -> bool { + return mlirShapedTypeHasStaticShape(self.type); + }, + "Returns whether the given shaped type has a static shape."); + c.def( + "is_dynamic_dim", + [](PyShapedType &self, intptr_t dim) -> bool { + self.requireHasRank(); + return mlirShapedTypeIsDynamicDim(self.type, dim); + }, + "Returns whether the dim-th dimension of the given shaped type is " + "dynamic."); + c.def( + "get_dim_size", + [](PyShapedType &self, intptr_t dim) { + self.requireHasRank(); + return mlirShapedTypeGetDimSize(self.type, dim); + }, + "Returns the dim-th dimension of the given ranked shaped type."); c.def_static( - "get_vector", - [](std::vector shape, PyType &elementType) { - // The element must be a floating point or integer scalar type. - if (mlirTypeIsAIntegerOrFloat(elementType.type)) { - MlirType t = - mlirVectorTypeGet(shape.size(), shape.data(), elementType.type); - return PyVectorType(t); - } - throw SetPyError( - PyExc_ValueError, - llvm::Twine("invalid '") + - py::repr(py::cast(elementType)).cast() + - "' and expected floating point or integer type."); + "is_dynamic_size", + [](int64_t size) -> bool { return mlirShapedTypeIsDynamicSize(size); }, + "Returns whether the given dimension size indicates a dynamic " + "dimension."); + c.def( + "is_dynamic_stride_or_offset", + [](PyShapedType &self, int64_t val) -> bool { + self.requireHasRank(); + return mlirShapedTypeIsDynamicStrideOrOffset(val); }, - py::keep_alive<0, 2>(), "Create a vector type"); + "Returns whether the given value is used as a placeholder for dynamic " + "strides and offsets in shaped types."); + } + +private: + void requireHasRank() { + if (!mlirShapedTypeHasRank(type)) { + throw SetPyError( + PyExc_ValueError, + "calling this method requires that the type has a rank."); + } + } +}; + +/// Vector Type subclass - VectorType. +class PyVectorType : public PyShapedType { +public: + static constexpr IsAFunctionTy isaFunction = mlirTypeIsAVector; + static constexpr const char *pyClassName = "VectorType"; + using PyShapedType::PyShapedType; + // TODO: Switch back to bindDerived by making the ClassTy modifiable by + // subclasses, exposing the ShapedType hierarchy. + static void bind(py::module &m) { + py::class_(m, pyClassName) + .def(py::init(), py::keep_alive<0, 1>()) + .def_static( + "get_vector", + // TODO: Make the location optional and create a default location. + [](std::vector shape, PyType &elementType, + PyLocation &loc) { + MlirType t = mlirVectorTypeGetChecked(shape.size(), shape.data(), + elementType.type, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point or integer type."); + } + return PyVectorType(t); + }, + py::keep_alive<0, 2>(), "Create a vector type"); + } +}; + +/// Ranked Tensor Type subclass - RankedTensorType. +class PyRankedTensorType : public PyShapedType { +public: + static constexpr IsAFunctionTy isaFunction = mlirTypeIsARankedTensor; + static constexpr const char *pyClassName = "RankedTensorType"; + using PyShapedType::PyShapedType; + // TODO: Switch back to bindDerived by making the ClassTy modifiable by + // subclasses, exposing the ShapedType hierarchy. + static void bind(py::module &m) { + py::class_(m, pyClassName) + .def(py::init(), py::keep_alive<0, 1>()) + .def_static( + "get_ranked_tensor", + // TODO: Make the location optional and create a default location. + [](std::vector shape, PyType &elementType, + PyLocation &loc) { + MlirType t = mlirRankedTensorTypeGetChecked( + shape.size(), shape.data(), elementType.type, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyRankedTensorType(t); + }, + py::keep_alive<0, 2>(), "Create a ranked tensor type"); + } +}; + +/// Unranked Tensor Type subclass - UnrankedTensorType. +class PyUnrankedTensorType : public PyShapedType { +public: + static constexpr IsAFunctionTy isaFunction = mlirTypeIsAUnrankedTensor; + static constexpr const char *pyClassName = "UnrankedTensorType"; + using PyShapedType::PyShapedType; + // TODO: Switch back to bindDerived by making the ClassTy modifiable by + // subclasses, exposing the ShapedType hierarchy. + static void bind(py::module &m) { + py::class_(m, pyClassName) + .def(py::init(), py::keep_alive<0, 1>()) + .def_static( + "get_unranked_tensor", + // TODO: Make the location optional and create a default location. + [](PyType &elementType, PyLocation &loc) { + MlirType t = + mlirUnrankedTensorTypeGetChecked(elementType.type, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyUnrankedTensorType(t); + }, + py::keep_alive<0, 1>(), "Create a unranked tensor type"); + } +}; + +/// Ranked MemRef Type subclass - MemRefType. +class PyMemRefType : public PyShapedType { +public: + static constexpr IsAFunctionTy isaFunction = mlirTypeIsARankedTensor; + static constexpr const char *pyClassName = "MemRefType"; + using PyShapedType::PyShapedType; + // TODO: Switch back to bindDerived by making the ClassTy modifiable by + // subclasses, exposing the ShapedType hierarchy. + static void bind(py::module &m) { + py::class_(m, pyClassName) + .def(py::init(), py::keep_alive<0, 1>()) + // TODO: Add mlirMemRefTypeGet and mlirMemRefTypeGetAffineMap binding + // once the affine map binding is completed. + .def_static( + "get_contiguous_memref", + // TODO: Make the location optional and create a default location. + [](PyType &elementType, std::vector shape, + unsigned memorySpace, PyLocation &loc) { + MlirType t = mlirMemRefTypeContiguousGetChecked( + elementType.type, shape.size(), shape.data(), memorySpace, + loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyMemRefType(t); + }, + py::keep_alive<0, 1>(), "Create a memref type") + .def_property_readonly( + "num_affine_maps", + [](PyMemRefType &self) -> intptr_t { + return mlirMemRefTypeGetNumAffineMaps(self.type); + }, + "Returns the number of affine layout maps in the given MemRef " + "type.") + .def_property_readonly( + "memory_space", + [](PyMemRefType &self) -> unsigned { + return mlirMemRefTypeGetMemorySpace(self.type); + }, + "Returns the memory space of the given MemRef type."); + } +}; + +/// Unranked MemRef Type subclass - UnrankedMemRefType. +class PyUnrankedMemRefType : public PyShapedType { +public: + static constexpr IsAFunctionTy isaFunction = mlirTypeIsAUnrankedMemRef; + static constexpr const char *pyClassName = "UnrankedMemRefType"; + using PyShapedType::PyShapedType; + // TODO: Switch back to bindDerived by making the ClassTy modifiable by + // subclasses, exposing the ShapedType hierarchy. + static void bind(py::module &m) { + py::class_(m, pyClassName) + .def(py::init(), py::keep_alive<0, 1>()) + .def_static( + "get_unranked_memref", + // TODO: Make the location optional and create a default location. + [](PyType &elementType, unsigned memorySpace, PyLocation &loc) { + MlirType t = mlirUnrankedMemRefTypeGetChecked( + elementType.type, memorySpace, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyUnrankedMemRefType(t); + }, + py::keep_alive<0, 1>(), "Create a unranked memref type") + .def_property_readonly( + "memory_space", + [](PyUnrankedMemRefType &self) -> unsigned { + return mlirUnrankedMemrefGetMemorySpace(self.type); + }, + "Returns the memory space of the given Unranked MemRef type."); } }; @@ -886,6 +1125,11 @@ void mlir::python::populateIRSubmodule(py::module &m) { PyF64Type::bind(m); PyNoneType::bind(m); PyComplexType::bind(m); + PyShapedType::bind(m); PyVectorType::bind(m); + PyRankedTensorType::bind(m); + PyUnrankedTensorType::bind(m); + PyMemRefType::bind(m); + PyUnrankedMemRefType::bind(m); PyTupleType::bind(m); } diff --git a/mlir/lib/CAPI/IR/StandardTypes.cpp b/mlir/lib/CAPI/IR/StandardTypes.cpp index eb006242e8808..ddd3a5e93147a 100644 --- a/mlir/lib/CAPI/IR/StandardTypes.cpp +++ b/mlir/lib/CAPI/IR/StandardTypes.cpp @@ -168,6 +168,13 @@ MlirType mlirVectorTypeGet(intptr_t rank, int64_t *shape, unwrap(elementType))); } +MlirType mlirVectorTypeGetChecked(intptr_t rank, int64_t *shape, + MlirType elementType, MlirLocation loc) { + return wrap(VectorType::getChecked( + llvm::makeArrayRef(shape, static_cast(rank)), unwrap(elementType), + unwrap(loc))); +} + /* ========================================================================== */ /* Ranked / Unranked tensor type. */ /* ========================================================================== */ @@ -189,10 +196,23 @@ MlirType mlirRankedTensorTypeGet(intptr_t rank, int64_t *shape, unwrap(elementType))); } +MlirType mlirRankedTensorTypeGetChecked(intptr_t rank, int64_t *shape, + MlirType elementType, + MlirLocation loc) { + return wrap(RankedTensorType::getChecked( + llvm::makeArrayRef(shape, static_cast(rank)), unwrap(elementType), + unwrap(loc))); +} + MlirType mlirUnrankedTensorTypeGet(MlirType elementType) { return wrap(UnrankedTensorType::get(unwrap(elementType))); } +MlirType mlirUnrankedTensorTypeGetChecked(MlirType elementType, + MlirLocation loc) { + return wrap(UnrankedTensorType::getChecked(unwrap(elementType), unwrap(loc))); +} + /* ========================================================================== */ /* Ranked / Unranked MemRef type. */ /* ========================================================================== */ @@ -216,6 +236,15 @@ MlirType mlirMemRefTypeContiguousGet(MlirType elementType, intptr_t rank, unwrap(elementType), llvm::None, memorySpace)); } +MlirType mlirMemRefTypeContiguousGetChecked(MlirType elementType, intptr_t rank, + int64_t *shape, + unsigned memorySpace, + MlirLocation loc) { + return wrap(MemRefType::getChecked( + llvm::makeArrayRef(shape, static_cast(rank)), unwrap(elementType), + llvm::None, memorySpace, unwrap(loc))); +} + intptr_t mlirMemRefTypeGetNumAffineMaps(MlirType type) { return static_cast( unwrap(type).cast().getAffineMaps().size()); @@ -237,6 +266,13 @@ MlirType mlirUnrankedMemRefTypeGet(MlirType elementType, unsigned memorySpace) { return wrap(UnrankedMemRefType::get(unwrap(elementType), memorySpace)); } +MlirType mlirUnrankedMemRefTypeGetChecked(MlirType elementType, + unsigned memorySpace, + MlirLocation loc) { + return wrap(UnrankedMemRefType::getChecked(unwrap(elementType), memorySpace, + unwrap(loc))); +} + unsigned mlirUnrankedMemrefGetMemorySpace(MlirType type) { return unwrap(type).cast().getMemorySpace(); } diff --git a/mlir/test/Bindings/Python/ir_types.py b/mlir/test/Bindings/Python/ir_types.py index a8f3a3840497a..00cd595843aa6 100644 --- a/mlir/test/Bindings/Python/ir_types.py +++ b/mlir/test/Bindings/Python/ir_types.py @@ -177,25 +177,187 @@ def testComplexType(): run(testComplexType) +# CHECK-LABEL: TEST: testShapedType +# Shaped type is not a kind of standard types, it is the base class for +# vectors, memrefs and tensors, so this test case uses an instance of vector +# to test the shaped type. +def testShapedType(): + ctx = mlir.ir.Context() + vector = mlir.ir.VectorType(ctx.parse_type("vector<2x3xf32>")) + # CHECK: element type: f32 + print("element type:", vector.element_type) + # CHECK: whether the given shaped type is ranked: True + print("whether the given shaped type is ranked:", vector.has_rank) + # CHECK: rank: 2 + print("rank:", vector.rank) + # CHECK: whether the shaped type has a static shape: True + print("whether the shaped type has a static shape:", vector.has_static_shape) + # CHECK: whether the dim-th dimension is dynamic: False + print("whether the dim-th dimension is dynamic:", vector.is_dynamic_dim(0)) + # CHECK: dim size: 3 + print("dim size:", vector.get_dim_size(1)) + # CHECK: False + print(vector.is_dynamic_size(3)) + # CHECK: False + print(vector.is_dynamic_stride_or_offset(1)) + +run(testShapedType) + # CHECK-LABEL: TEST: testVectorType def testVectorType(): ctx = mlir.ir.Context() f32 = mlir.ir.F32Type(ctx) shape = [2, 3] + loc = ctx.get_unknown_location() # CHECK: vector type: vector<2x3xf32> - print("vector type:", mlir.ir.VectorType.get_vector(shape, f32)) + print("vector type:", mlir.ir.VectorType.get_vector(shape, f32, loc)) - index = mlir.ir.IndexType(ctx) + none = mlir.ir.NoneType(ctx) try: - vector_invalid = mlir.ir.VectorType.get_vector(shape, index) + vector_invalid = mlir.ir.VectorType.get_vector(shape, none, loc) except ValueError as e: - # CHECK: invalid 'Type(index)' and expected floating point or integer type. + # CHECK: invalid 'Type(none)' and expected floating point or integer type. print(e) else: print("Exception not produced") run(testVectorType) +# CHECK-LABEL: TEST: testRankedTensorType +def testRankedTensorType(): + ctx = mlir.ir.Context() + f32 = mlir.ir.F32Type(ctx) + shape = [2, 3] + loc = ctx.get_unknown_location() + # CHECK: ranked tensor type: tensor<2x3xf32> + print("ranked tensor type:", + mlir.ir.RankedTensorType.get_ranked_tensor(shape, f32, loc)) + + none = mlir.ir.NoneType(ctx) + try: + tensor_invalid = mlir.ir.RankedTensorType.get_ranked_tensor(shape, none, + loc) + except ValueError as e: + # CHECK: invalid 'Type(none)' and expected floating point, integer, vector + # CHECK: or complex type. + print(e) + else: + print("Exception not produced") + +run(testRankedTensorType) + +# CHECK-LABEL: TEST: testUnrankedTensorType +def testUnrankedTensorType(): + ctx = mlir.ir.Context() + f32 = mlir.ir.F32Type(ctx) + loc = ctx.get_unknown_location() + unranked_tensor = mlir.ir.UnrankedTensorType.get_unranked_tensor(f32, loc) + # CHECK: unranked tensor type: tensor<*xf32> + print("unranked tensor type:", unranked_tensor) + try: + invalid_rank = unranked_tensor.rank + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + try: + invalid_is_dynamic_dim = unranked_tensor.is_dynamic_dim(0) + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + try: + invalid_get_dim_size = unranked_tensor.get_dim_size(1) + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + + none = mlir.ir.NoneType(ctx) + try: + tensor_invalid = mlir.ir.UnrankedTensorType.get_unranked_tensor(none, loc) + except ValueError as e: + # CHECK: invalid 'Type(none)' and expected floating point, integer, vector + # CHECK: or complex type. + print(e) + else: + print("Exception not produced") + +run(testUnrankedTensorType) + +# CHECK-LABEL: TEST: testMemRefType +def testMemRefType(): + ctx = mlir.ir.Context() + f32 = mlir.ir.F32Type(ctx) + shape = [2, 3] + loc = ctx.get_unknown_location() + memref = mlir.ir.MemRefType.get_contiguous_memref(f32, shape, 2, loc) + # CHECK: memref type: memref<2x3xf32, 2> + print("memref type:", memref) + # CHECK: number of affine layout maps: 0 + print("number of affine layout maps:", memref.num_affine_maps) + # CHECK: memory space: 2 + print("memory space:", memref.memory_space) + + none = mlir.ir.NoneType(ctx) + try: + memref_invalid = mlir.ir.MemRefType.get_contiguous_memref(none, shape, 2, + loc) + except ValueError as e: + # CHECK: invalid 'Type(none)' and expected floating point, integer, vector + # CHECK: or complex type. + print(e) + else: + print("Exception not produced") + +run(testMemRefType) + +# CHECK-LABEL: TEST: testUnrankedMemRefType +def testUnrankedMemRefType(): + ctx = mlir.ir.Context() + f32 = mlir.ir.F32Type(ctx) + loc = ctx.get_unknown_location() + unranked_memref = mlir.ir.UnrankedMemRefType.get_unranked_memref(f32, 2, loc) + # CHECK: unranked memref type: memref<*xf32, 2> + print("unranked memref type:", unranked_memref) + try: + invalid_rank = unranked_memref.rank + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + try: + invalid_is_dynamic_dim = unranked_memref.is_dynamic_dim(0) + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + try: + invalid_get_dim_size = unranked_memref.get_dim_size(1) + except ValueError as e: + # CHECK: calling this method requires that the type has a rank. + print(e) + else: + print("Exception not produced") + + none = mlir.ir.NoneType(ctx) + try: + memref_invalid = mlir.ir.UnrankedMemRefType.get_unranked_memref(none, 2, + loc) + except ValueError as e: + # CHECK: invalid 'Type(none)' and expected floating point, integer, vector + # CHECK: or complex type. + print(e) + else: + print("Exception not produced") + +run(testUnrankedMemRefType) + # CHECK-LABEL: TEST: testTupleType def testTupleType(): ctx = mlir.ir.Context() From 16bb71fd4f898d296397336ecb81b79a7297933c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 6 Sep 2020 20:14:58 +0100 Subject: [PATCH 288/465] [DSE,MemorySSA] Add a few additional debug messages. --- .../Scalar/DeadStoreElimination.cpp | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 0296d20bc07b9..109e15d6d7cfc 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1828,29 +1828,42 @@ struct DSEState { MemoryAccess *Current = StartAccess; Instruction *KillingI = KillingDef->getMemoryInst(); bool StepAgain; - LLVM_DEBUG(dbgs() << " trying to get dominating access for " - << *StartAccess << "\n"); + LLVM_DEBUG(dbgs() << " trying to get dominating access\n"); // Find the next clobbering Mod access for DefLoc, starting at StartAccess. do { StepAgain = false; + LLVM_DEBUG({ + dbgs() << " visiting " << *Current; + if (!MSSA.isLiveOnEntryDef(Current) && isa(Current)) + dbgs() << " (" << *cast(Current)->getMemoryInst() + << ")"; + dbgs() << "\n"; + }); + // Reached TOP. - if (MSSA.isLiveOnEntryDef(Current)) + if (MSSA.isLiveOnEntryDef(Current)) { + LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n"); return None; + } // Cost of a step. Accesses in the same block are more likely to be valid // candidates for elimination, hence consider them cheaper. unsigned StepCost = KillingDef->getBlock() == Current->getBlock() ? MemorySSASameBBStepCost : MemorySSAOtherBBStepCost; - if (WalkerStepLimit <= StepCost) + if (WalkerStepLimit <= StepCost) { + LLVM_DEBUG(dbgs() << " ... hit walker step limit\n"); return None; + } WalkerStepLimit -= StepCost; // Return for MemoryPhis. They cannot be eliminated directly and the // caller is responsible for traversing them. - if (isa(Current)) + if (isa(Current)) { + LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n"); return Current; + } // Below, check if CurrentDef is a valid candidate to be eliminated by // KillingDef. If it is not, check the next candidate. From 8542dab909f895a8b6812428bb5e1acf7ea15305 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Sun, 6 Sep 2020 15:54:24 -0400 Subject: [PATCH 289/465] [compiler-rt] Implement __clear_cache() on OpenBSD/arm --- compiler-rt/lib/builtins/clear_cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c index 72e02e613de50..29e31f55d4998 100644 --- a/compiler-rt/lib/builtins/clear_cache.c +++ b/compiler-rt/lib/builtins/clear_cache.c @@ -33,7 +33,7 @@ uintptr_t GetCurrentProcess(void); #include #endif -#if defined(__OpenBSD__) && defined(__mips__) +#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__)) // clang-format off #include #include @@ -58,7 +58,7 @@ void __clear_cache(void *start, void *end) { #elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__)) FlushInstructionCache(GetCurrentProcess(), start, end - start); #elif defined(__arm__) && !defined(__APPLE__) -#if defined(__FreeBSD__) || defined(__NetBSD__) +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) struct arm_sync_icache_args arg; arg.addr = (uintptr_t)start; From b536cbaac5f85a3a1ab8c971c300cd27e5603fda Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 6 Sep 2020 23:14:05 +0200 Subject: [PATCH 290/465] [ValueTracking] Avoid known bits fallback for non-zero get check (NFCI) The known bits fall back will never be able to infer a non-null value here, so don't bother. --- llvm/lib/Analysis/ValueTracking.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 3f7cf296ac1c5..3a6ee355c646d 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2413,8 +2413,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well // as casts that can alter the value, e.g., AddrSpaceCasts. if (const GEPOperator *GEP = dyn_cast(V)) - if (isGEPKnownNonNull(GEP, Depth, Q)) - return true; + return isGEPKnownNonNull(GEP, Depth, Q); if (auto *BCO = dyn_cast(V)) return isKnownNonZero(BCO->getOperand(0), Depth, Q); From ab68517e6b7e51b84c4b0e813a30258ec1ce5da5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 6 Sep 2020 14:44:36 -0700 Subject: [PATCH 291/465] [asan_symbolize] Pass --demangle/--no-demangle instead of --demangle={True,False} `--demangle={True,False}` were accepted but disallowed after llvm-symbolizer's switch to OptTable. (`--demangle={true,false}` were temporarily supported but they are case sensitive.) --- compiler-rt/lib/asan/scripts/asan_symbolize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py index 1e06e0d56fb94..1e78cb1b0e77a 100755 --- a/compiler-rt/lib/asan/scripts/asan_symbolize.py +++ b/compiler-rt/lib/asan/scripts/asan_symbolize.py @@ -89,7 +89,7 @@ def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): def open_llvm_symbolizer(self): cmd = [self.symbolizer_path, - '--demangle=%s' % demangle, + ('--demangle' if demangle else '--no-demangle'), '--functions=linkage', '--inlines', '--default-arch=%s' % self.default_arch] From efa57f9a7adb11a14b4e0d930f49070c769fa6ac Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Thu, 3 Sep 2020 12:33:53 -0500 Subject: [PATCH 292/465] [PowerPC] Implement Vector Expand Mask builtins in LLVM/Clang This patch implements the vec_expandm function prototypes in altivec.h in order to utilize the vector expand with mask instructions introduced in Power10. Differential Revision: https://reviews.llvm.org/D82727 --- clang/include/clang/Basic/BuiltinsPPC.def | 7 +++ clang/lib/Headers/altivec.h | 27 +++++++++ clang/test/CodeGen/builtins-ppc-p10vector.c | 30 ++++++++++ llvm/include/llvm/IR/IntrinsicsPowerPC.td | 12 ++++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 15 +++-- .../CodeGen/PowerPC/p10-vector-mask-ops.ll | 56 +++++++++++++++++++ 6 files changed, 142 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 57ef39980c9b6..89dd03075b28f 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -322,6 +322,13 @@ BUILTIN(__builtin_altivec_vmulhuw, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vmulhsd, "V2LLiV2LLiV2LLi", "") BUILTIN(__builtin_altivec_vmulhud, "V2ULLiV2ULLiV2ULLi", "") +// P10 Vector Expand with Mask built-ins. +BUILTIN(__builtin_altivec_vexpandbm, "V16UcV16Uc", "") +BUILTIN(__builtin_altivec_vexpandhm, "V8UsV8Us", "") +BUILTIN(__builtin_altivec_vexpandwm, "V4UiV4Ui", "") +BUILTIN(__builtin_altivec_vexpanddm, "V2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vexpandqm, "V1ULLLiV1ULLLi", "") + // P10 Vector Parallel Bits built-ins. BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index a7c4fd23ef19b..22744adefbefd 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -17041,6 +17041,33 @@ vec_extractm(vector unsigned __int128 __a) { return __builtin_altivec_vextractqm(__a); } +/* vec_expandm */ + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_expandm(vector unsigned char __a) { + return __builtin_altivec_vexpandbm(__a); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_expandm(vector unsigned short __a) { + return __builtin_altivec_vexpandhm(__a); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_expandm(vector unsigned int __a) { + return __builtin_altivec_vexpandwm(__a); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_expandm(vector unsigned long long __a) { + return __builtin_altivec_vexpanddm(__a); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_expandm(vector unsigned __int128 __a) { + return __builtin_altivec_vexpandqm(__a); +} + /* vec_pdep */ static __inline__ vector unsigned long long __ATTRS_o_ai diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c index c850ebd1c70f8..ad63d646196c3 100644 --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -201,6 +201,36 @@ vector unsigned long long test_vcfuged(void) { return vec_cfuge(vulla, vullb); } +vector unsigned char test_vec_expandm_uc(void) { + // CHECK: @llvm.ppc.altivec.vexpandbm(<16 x i8> %{{.+}}) + // CHECK-NEXT: ret <16 x i8> + return vec_expandm(vuca); +} + +vector unsigned short test_vec_expandm_us(void) { + // CHECK: @llvm.ppc.altivec.vexpandhm(<8 x i16> %{{.+}}) + // CHECK-NEXT: ret <8 x i16> + return vec_expandm(vusa); +} + +vector unsigned int test_vec_expandm_ui(void) { + // CHECK: @llvm.ppc.altivec.vexpandwm(<4 x i32> %{{.+}}) + // CHECK-NEXT: ret <4 x i32> + return vec_expandm(vuia); +} + +vector unsigned long long test_vec_expandm_ull(void) { + // CHECK: @llvm.ppc.altivec.vexpanddm(<2 x i64> %{{.+}}) + // CHECK-NEXT: ret <2 x i64> + return vec_expandm(vulla); +} + +vector unsigned __int128 test_vec_expandm_u128(void) { + // CHECK: @llvm.ppc.altivec.vexpandqm(<1 x i128> %{{.+}}) + // CHECK-NEXT: ret <1 x i128> + return vec_expandm(vui128a); +} + unsigned long long test_vgnb_1(void) { // CHECK: @llvm.ppc.altivec.vgnb(<1 x i128> %{{.+}}, i32 2) // CHECK-NEXT: ret i64 diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 4ead968a19752..73a49ec77f8b4 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -455,6 +455,18 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">, Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Vector Expand with Mask + def int_ppc_altivec_vexpandbm : GCCBuiltin<"__builtin_altivec_vexpandbm">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandhm : GCCBuiltin<"__builtin_altivec_vexpandhm">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandwm : GCCBuiltin<"__builtin_altivec_vexpandwm">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpanddm : GCCBuiltin<"__builtin_altivec_vexpanddm">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandqm : GCCBuiltin<"__builtin_altivec_vexpandqm">, + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins. def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 8032656135a44..73321dec99d37 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1003,19 +1003,24 @@ let Predicates = [IsISA3_1] in { (int_ppc_altivec_vextractqm v1i128:$vB))]>; def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandbm $vD, $vB", IIC_VecGeneral, - []>; + [(set v16i8:$vD, (int_ppc_altivec_vexpandbm + v16i8:$vB))]>; def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandhm $vD, $vB", IIC_VecGeneral, - []>; + [(set v8i16:$vD, (int_ppc_altivec_vexpandhm + v8i16:$vB))]>; def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandwm $vD, $vB", IIC_VecGeneral, - []>; + [(set v4i32:$vD, (int_ppc_altivec_vexpandwm + v4i32:$vB))]>; def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$vD), (ins vrrc:$vB), "vexpanddm $vD, $vB", IIC_VecGeneral, - []>; + [(set v2i64:$vD, (int_ppc_altivec_vexpanddm + v2i64:$vB))]>; def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandqm $vD, $vB", IIC_VecGeneral, - []>; + [(set v1i128:$vD, (int_ppc_altivec_vexpandqm + v1i128:$vB))]>; def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB), "mtvsrbm $vD, $rB", IIC_VecGeneral, []>; diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll index 2b1cf27c20ec9..637361f7b1c96 100644 --- a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll +++ b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll @@ -64,3 +64,59 @@ entry: %ext = tail call i32 @llvm.ppc.altivec.vextractqm(<1 x i128> %a) ret i32 %ext } + +declare <16 x i8> @llvm.ppc.altivec.vexpandbm(<16 x i8>) +declare <8 x i16> @llvm.ppc.altivec.vexpandhm(<8 x i16>) +declare <4 x i32> @llvm.ppc.altivec.vexpandwm(<4 x i32>) +declare <2 x i64> @llvm.ppc.altivec.vexpanddm(<2 x i64>) +declare <1 x i128> @llvm.ppc.altivec.vexpandqm(<1 x i128>) + +define <16 x i8> @test_vexpandbm(<16 x i8> %a) { +; CHECK-LABEL: test_vexpandbm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandbm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <16 x i8> @llvm.ppc.altivec.vexpandbm(<16 x i8> %a) + ret <16 x i8> %exp +} + +define <8 x i16> @test_vexpandhm(<8 x i16> %a) { +; CHECK-LABEL: test_vexpandhm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandhm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <8 x i16> @llvm.ppc.altivec.vexpandhm(<8 x i16> %a) + ret <8 x i16> %exp +} + +define <4 x i32> @test_vexpandwm(<4 x i32> %a) { +; CHECK-LABEL: test_vexpandwm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandwm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <4 x i32> @llvm.ppc.altivec.vexpandwm(<4 x i32> %a) + ret <4 x i32> %exp +} + +define <2 x i64> @test_vexpanddm(<2 x i64> %a) { +; CHECK-LABEL: test_vexpanddm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpanddm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <2 x i64> @llvm.ppc.altivec.vexpanddm(<2 x i64> %a) + ret <2 x i64> %exp +} + +define <1 x i128> @test_vexpandqm(<1 x i128> %a) { +; CHECK-LABEL: test_vexpandqm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandqm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <1 x i128> @llvm.ppc.altivec.vexpandqm(<1 x i128> %a) + ret <1 x i128> %exp +} From caee15a0ed52471bd329d01dc253ec9be3936c6d Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Sun, 6 Sep 2020 15:42:21 -0700 Subject: [PATCH 293/465] [WebAssembly] Fix incorrect assumption of simple value types Fixes PR47375, in which an assertion was triggering because WebAssemblyTargetLowering::isVectorLoadExtDesirable was improperly assuming the use of simple value types. Differential Revision: https://reviews.llvm.org/D87110 --- .../WebAssembly/WebAssemblyISelLowering.cpp | 4 +-- llvm/test/CodeGen/WebAssembly/pr47375.ll | 36 +++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/pr47375.ll diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 822b64d3de105..8f5b7301e6532 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -620,8 +620,8 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, } bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { - MVT ExtT = ExtVal.getSimpleValueType(); - MVT MemT = cast(ExtVal->getOperand(0))->getSimpleValueType(0); + EVT ExtT = ExtVal.getValueType(); + EVT MemT = cast(ExtVal->getOperand(0))->getValueType(0); return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) || (ExtT == MVT::v4i32 && MemT == MVT::v4i16) || (ExtT == MVT::v2i64 && MemT == MVT::v2i32); diff --git a/llvm/test/CodeGen/WebAssembly/pr47375.ll b/llvm/test/CodeGen/WebAssembly/pr47375.ll new file mode 100644 index 0000000000000..4c04631f26b11 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/pr47375.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; Regression test for pr47375, in which an assertion was triggering +; because WebAssemblyTargetLowering::isVectorLoadExtDesirable was +; improperly assuming the use of simple value types. + +define void @sext_vec() { +; CHECK-LABEL: sext_vec: +; CHECK: .functype sext_vec () -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.load8_u 0 +; CHECK-NEXT: local.set 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.store8 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 7 +; CHECK-NEXT: i32.shl +; CHECK-NEXT: i32.or +; CHECK-NEXT: i32.const 7175 +; CHECK-NEXT: i32.and +; CHECK-NEXT: i32.store16 0 +; CHECK-NEXT: # fallthrough-return + %L1 = load <2 x i3>, <2 x i3>* undef, align 2 + %zext = zext <2 x i3> %L1 to <2 x i10> + store <2 x i10> %zext, <2 x i10>* undef, align 4 + ret void +} From d5c45041f1465f4ecc3828efbbb27aa7b4d23d89 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Sun, 6 Sep 2020 21:14:14 -0400 Subject: [PATCH 294/465] [machinesink] add testcase for more sinking - NFC --- .../PowerPC/sink-down-more-instructions.ll | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll new file mode 100644 index 0000000000000..c13d181519964 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s + +define signext i32 @foo(i32 signext %0, i32 signext %1, i32* %2, i32* %3, i32 signext %4) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpwi r7, 1 +; CHECK-NEXT: blt cr0, .LBB0_8 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: addi r4, r5, -4 +; CHECK-NEXT: addi r8, r6, -4 +; CHECK-NEXT: clrldi r7, r7, 32 +; CHECK-NEXT: li r5, 0 +; CHECK-NEXT: mtctr r7 +; CHECK-NEXT: lis r7, -30584 +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: cmplwi r3, 3 +; CHECK-NEXT: cmplwi cr1, r3, 1 +; CHECK-NEXT: ori r7, r7, 34953 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: mulhwu r9, r6, r7 +; CHECK-NEXT: srwi r9, r9, 4 +; CHECK-NEXT: mulli r9, r9, 30 +; CHECK-NEXT: sub r9, r6, r9 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: addi r6, r6, 1 +; CHECK-NEXT: add r9, r9, r5 +; CHECK-NEXT: stw r9, 4(r8) +; CHECK-NEXT: mr r8, r3 +; CHECK-NEXT: bdz .LBB0_8 +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: lwzu r9, 4(r4) +; CHECK-NEXT: addi r3, r8, 4 +; CHECK-NEXT: add r5, r9, r5 +; CHECK-NEXT: beq cr0, .LBB0_7 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: bne cr1, .LBB0_2 +; CHECK-NEXT: # %bb.6: +; CHECK-NEXT: slwi r9, r6, 1 +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_7: +; CHECK-NEXT: addi r9, r6, 100 +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_8: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr + %6 = icmp sgt i32 %4, 0 + br i1 %6, label %7, label %9 + +7: ; preds = %5 + %8 = zext i32 %4 to i64 + br label %10 + +9: ; preds = %25, %5 + ret i32 undef + +10: ; preds = %7, %25 + %11 = phi i64 [ 0, %7 ], [ %29, %25 ] + %12 = phi i32 [ 0, %7 ], [ %30, %25 ] + %13 = phi i32 [ 0, %7 ], [ %16, %25 ] + %14 = getelementptr inbounds i32, i32* %2, i64 %11 + %15 = load i32, i32* %14, align 4 + %16 = add nsw i32 %15, %13 + switch i32 %0, label %22 [ + i32 1, label %17 + i32 3, label %20 + ] + +17: ; preds = %10 + %18 = trunc i64 %11 to i32 + %19 = shl i32 %18, 1 + br label %25 + +20: ; preds = %10 + %21 = add nuw nsw i32 %12, 100 + br label %25 + +22: ; preds = %10 + %23 = trunc i64 %11 to i32 + %24 = urem i32 %23, 30 + br label %25 + +25: ; preds = %22, %20, %17 + %26 = phi i32 [ %24, %22 ], [ %21, %20 ], [ %19, %17 ] + %27 = add nsw i32 %26, %16 + %28 = getelementptr inbounds i32, i32* %3, i64 %11 + store i32 %27, i32* %28, align 4 + %29 = add nuw nsw i64 %11, 1 + %30 = add nuw nsw i32 %12, 1 + %31 = icmp eq i64 %29, %8 + br i1 %31, label %9, label %10 +} + From 69f2c79f2ad2c3ebdb000cb1311612db7bd2bef8 Mon Sep 17 00:00:00 2001 From: Zi Xuan Wu Date: Wed, 2 Sep 2020 17:02:17 +0800 Subject: [PATCH 295/465] [ELF] Add a new e_machine value EM_CSKY and add some CSKY relocation types This is the split part of D86269, which add a new ELF machine flag called EM_CSKY and related relocations. Some target-specific flags and tests for csky can be added in follow-up patches later. Differential Revision: https://reviews.llvm.org/D86610 --- llvm/include/llvm/BinaryFormat/ELF.h | 7 ++ .../llvm/BinaryFormat/ELFRelocs/CSKY.def | 74 +++++++++++++++++++ llvm/include/llvm/Object/ELFObjectFile.h | 4 + llvm/lib/Object/ELF.cpp | 9 +++ llvm/lib/ObjectYAML/ELFYAML.cpp | 4 + llvm/unittests/Object/ELFObjectFileTest.cpp | 15 ++++ 6 files changed, 113 insertions(+) create mode 100644 llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index a1eb4d0383fb2..8fa5646879c83 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -312,6 +312,7 @@ enum { EM_LANAI = 244, // Lanai 32-bit processor EM_BPF = 247, // Linux kernel bpf virtual machine EM_VE = 251, // NEC SX-Aurora VE + EM_CSKY = 252, // C-SKY 32-bit processor }; // Object file classes. @@ -772,6 +773,12 @@ enum { #include "ELFRelocs/VE.def" }; + +// ELF Relocation types for CSKY +enum { +#include "ELFRelocs/CSKY.def" +}; + #undef ELF_RELOC // Section header. diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def new file mode 100644 index 0000000000000..c5f2dbae8033c --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def @@ -0,0 +1,74 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_CKCORE_NONE, 0) +ELF_RELOC(R_CKCORE_ADDR32, 1) +ELF_RELOC(R_CKCORE_PCREL_IMM8_4, 2) +ELF_RELOC(R_CKCORE_PCREL_IMM11_2, 3) +ELF_RELOC(R_CKCORE_PCREL_IMM4_2, 4) +ELF_RELOC(R_CKCORE_PCREL32, 5) +ELF_RELOC(R_CKCORE_PCREL_JSR_IMM11_2, 6) +ELF_RELOC(R_CKCORE_GNU_VTINHERIT, 7) +ELF_RELOC(R_CKCORE_GNU_VTENTRY, 8) +ELF_RELOC(R_CKCORE_RELATIVE, 9) +ELF_RELOC(R_CKCORE_COPY, 10) +ELF_RELOC(R_CKCORE_GLOB_DAT, 11) +ELF_RELOC(R_CKCORE_JUMP_SLOT, 12) +ELF_RELOC(R_CKCORE_GOTOFF, 13) +ELF_RELOC(R_CKCORE_GOTPC, 14) +ELF_RELOC(R_CKCORE_GOT32, 15) +ELF_RELOC(R_CKCORE_PLT32, 16) +ELF_RELOC(R_CKCORE_ADDRGOT, 17) +ELF_RELOC(R_CKCORE_ADDRPLT, 18) +ELF_RELOC(R_CKCORE_PCREL_IMM26_2, 19) +ELF_RELOC(R_CKCORE_PCREL_IMM16_2, 20) +ELF_RELOC(R_CKCORE_PCREL_IMM16_4, 21) +ELF_RELOC(R_CKCORE_PCREL_IMM10_2, 22) +ELF_RELOC(R_CKCORE_PCREL_IMM10_4, 23) +ELF_RELOC(R_CKCORE_ADDR_HI16, 24) +ELF_RELOC(R_CKCORE_ADDR_LO16, 25) +ELF_RELOC(R_CKCORE_GOTPC_HI16, 26) +ELF_RELOC(R_CKCORE_GOTPC_LO16, 27) +ELF_RELOC(R_CKCORE_GOTOFF_HI16, 28) +ELF_RELOC(R_CKCORE_GOTOFF_LO16, 29) +ELF_RELOC(R_CKCORE_GOT12, 30) +ELF_RELOC(R_CKCORE_GOT_HI16, 31) +ELF_RELOC(R_CKCORE_GOT_LO16, 32) +ELF_RELOC(R_CKCORE_PLT12, 33) +ELF_RELOC(R_CKCORE_PLT_HI16, 34) +ELF_RELOC(R_CKCORE_PLT_LO16, 35) +ELF_RELOC(R_CKCORE_ADDRGOT_HI16, 36) +ELF_RELOC(R_CKCORE_ADDRGOT_LO16, 37) +ELF_RELOC(R_CKCORE_ADDRPLT_HI16, 38) +ELF_RELOC(R_CKCORE_ADDRPLT_LO16, 39) +ELF_RELOC(R_CKCORE_PCREL_JSR_IMM26_2, 40) +ELF_RELOC(R_CKCORE_TOFFSET_LO16, 41) +ELF_RELOC(R_CKCORE_DOFFSET_LO16, 42) +ELF_RELOC(R_CKCORE_PCREL_IMM18_2, 43) +ELF_RELOC(R_CKCORE_DOFFSET_IMM18, 44) +ELF_RELOC(R_CKCORE_DOFFSET_IMM18_2, 45) +ELF_RELOC(R_CKCORE_DOFFSET_IMM18_4, 46) +ELF_RELOC(R_CKCORE_GOTOFF_IMM18, 47) +ELF_RELOC(R_CKCORE_GOT_IMM18_4, 48) +ELF_RELOC(R_CKCORE_PLT_IMM18_4, 49) +ELF_RELOC(R_CKCORE_PCREL_IMM7_4, 50) +ELF_RELOC(R_CKCORE_TLS_LE32, 51) +ELF_RELOC(R_CKCORE_TLS_IE32, 52) +ELF_RELOC(R_CKCORE_TLS_GD32, 53) +ELF_RELOC(R_CKCORE_TLS_LDM32, 54) +ELF_RELOC(R_CKCORE_TLS_LDO32, 55) +ELF_RELOC(R_CKCORE_TLS_DTPMOD32, 56) +ELF_RELOC(R_CKCORE_TLS_DTPOFF32, 57) +ELF_RELOC(R_CKCORE_TLS_TPOFF32, 58) +ELF_RELOC(R_CKCORE_PCREL_FLRW_IMM8_4, 59) +ELF_RELOC(R_CKCORE_NOJSRI, 60) +ELF_RELOC(R_CKCORE_CALLGRAPH, 61) +ELF_RELOC(R_CKCORE_IRELATIVE, 62) +ELF_RELOC(R_CKCORE_PCREL_BLOOP_IMM4_4, 63) +ELF_RELOC(R_CKCORE_PCREL_BLOOP_IMM12_4, 64) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_1, 65) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_2, 66) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_4, 67) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_8, 68) diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 8049020f564b6..74d4745c1034f 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -1112,6 +1112,8 @@ StringRef ELFObjectFile::getFileFormatName() const { return "elf32-powerpc"; case ELF::EM_RISCV: return "elf32-littleriscv"; + case ELF::EM_CSKY: + return "elf32-csky"; case ELF::EM_SPARC: case ELF::EM_SPARC32PLUS: return "elf32-sparc"; @@ -1224,6 +1226,8 @@ template Triple::ArchType ELFObjectFile::getArch() const { case ELF::EM_VE: return Triple::ve; + case ELF::EM_CSKY: + return Triple::csky; default: return Triple::UnknownArch; } diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 0e06af92d64bc..c6e9ee175adc8 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -152,6 +152,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, break; } break; + case ELF::EM_CSKY: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/CSKY.def" + default: + break; + } + break; default: break; } @@ -194,6 +201,8 @@ uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) { case ELF::EM_SPARC32PLUS: case ELF::EM_SPARCV9: return ELF::R_SPARC_RELATIVE; + case ELF::EM_CSKY: + return ELF::R_CKCORE_RELATIVE; case ELF::EM_AMDGPU: break; case ELF::EM_BPF: diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 19f909e565644..2ebea1176a6f0 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -230,6 +230,7 @@ void ScalarEnumerationTraits::enumeration( ECase(EM_LANAI); ECase(EM_BPF); ECase(EM_VE); + ECase(EM_CSKY); #undef ECase IO.enumFallback(Value); } @@ -674,6 +675,9 @@ void ScalarEnumerationTraits::enumeration( case ELF::EM_VE: #include "llvm/BinaryFormat/ELFRelocs/VE.def" break; + case ELF::EM_CSKY: +#include "llvm/BinaryFormat/ELFRelocs/CSKY.def" + break; case ELF::EM_PPC64: #include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" break; diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index d4d9a05f8c4e2..ebbbae0af0936 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -282,3 +282,18 @@ TEST(ELFObjectFileTest, MachineTestForMSP430) { for (const DataForTest &D : generateData(ELF::EM_MSP430)) checkFormatAndArch(D, Formats[I++], Triple::msp430); } + +TEST(ELFObjectFileTest, MachineTestForCSKY) { + std::array Formats = {"elf32-csky", "elf32-csky", + "elf64-unknown", "elf64-unknown"}; + size_t I = 0; + for (const DataForTest &D : generateData(ELF::EM_CSKY)) + checkFormatAndArch(D, Formats[I++], Triple::csky); +} + + + +// ELF relative relocation type test. +TEST(ELFObjectFileTest, RelativeRelocationTypeTest) { + EXPECT_EQ(ELF::R_CKCORE_RELATIVE, getELFRelativeRelocationType(ELF::EM_CSKY)); +} From 9bee13f89085b08e4e8e24c51c11526fcef6efe1 Mon Sep 17 00:00:00 2001 From: Muhammad Omair Javaid Date: Mon, 7 Sep 2020 08:35:56 +0500 Subject: [PATCH 296/465] Move targetHasSVE function to lldbtest.py targetHasSVE helper function was added to test for availability of SVE support by connected platform. We now intend to use this function in other testcases and I am moving it to a generic location in lldbtest.py to allow usage by other upcoming testcases. Reviewed By: labath Differential Revision: https://reviews.llvm.org/D86872 --- .../Python/lldbsuite/test/lldbtest.py | 23 ++++++++++++++++ .../TestSVERegisters.py | 27 ++----------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index e1966692b03c7..73faa2aef5e4b 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -1293,6 +1293,29 @@ def isPPC64le(self): return True return False + def isAArch64SVE(self): + triple = self.dbg.GetSelectedPlatform().GetTriple() + + # TODO other platforms, please implement this function + if not re.match(".*-.*-linux", triple): + return False + + # Need to do something different for non-Linux/Android targets + cpuinfo_path = self.getBuildArtifact("cpuinfo") + if configuration.lldb_platform_name: + self.runCmd('platform get-file "/proc/cpuinfo" ' + cpuinfo_path) + else: + cpuinfo_path = "/proc/cpuinfo" + + try: + f = open(cpuinfo_path, 'r') + cpuinfo = f.read() + f.close() + except: + return False + + return " sve " in cpuinfo + def getArchitecture(self): """Returns the architecture in effect the test suite is running with.""" module = builder_module() diff --git a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py index 42d30f6cb1137..b243a6692d852 100644 --- a/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py +++ b/lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py @@ -10,29 +10,6 @@ class RegisterCommandsTestCase(TestBase): - def targetHasSVE(self): - triple = self.dbg.GetSelectedPlatform().GetTriple() - - # TODO other platforms, please implement this function - if not re.match(".*-.*-linux", triple): - return False - - # Need to do something different for non-Linux/Android targets - cpuinfo_path = self.getBuildArtifact("cpuinfo") - if configuration.lldb_platform_name: - self.runCmd('platform get-file "/proc/cpuinfo" ' + cpuinfo_path) - else: - cpuinfo_path = "/proc/cpuinfo" - - try: - f = open(cpuinfo_path, 'r') - cpuinfo = f.read() - f.close() - except: - return False - - return " sve " in cpuinfo - def check_sve_register_size(self, set, name, expected): reg_value = set.GetChildMemberWithName(name) self.assertTrue(reg_value.IsValid(), @@ -53,7 +30,7 @@ def test_sve_registers_configuration(self): exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - if not self.targetHasSVE(): + if not self.isAArch64SVE(): self.skipTest('SVE registers must be supported.') lldbutil.run_break_set_by_file_and_line( @@ -108,7 +85,7 @@ def test_sve_registers_read_write(self): exe = self.getBuildArtifact("a.out") self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) - if not self.targetHasSVE(): + if not self.isAArch64SVE(): self.skipTest('SVE registers must be supported.') lldbutil.run_break_set_by_file_and_line( From 1fd7dc40748b17d8fc47ef1ebede9df61e973056 Mon Sep 17 00:00:00 2001 From: dongAxis Date: Mon, 7 Sep 2020 11:43:16 +0800 Subject: [PATCH 297/465] When dumping results of StackLifetime, it will print the following log: BB [7, 8): begin {}, end {}, livein {}, liveout {} BB [1, 2): begin {}, end {}, livein {}, liveout {} ... But it is not convenient to know what the basic block is. So I add the basic block name to it. Reviewed By: vitalybuka TestPlan: check-llvm Differential Revision: https://reviews.llvm.org/D87152 --- llvm/lib/Analysis/StackLifetime.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp index f95a8918afbba..ab5f2db7d1cd9 100644 --- a/llvm/lib/Analysis/StackLifetime.cpp +++ b/llvm/lib/Analysis/StackLifetime.cpp @@ -292,7 +292,7 @@ LLVM_DUMP_METHOD void StackLifetime::dumpBlockLiveness() const { const BasicBlock *BB = IT.getFirst(); const BlockLifetimeInfo &BlockInfo = BlockLiveness.find(BB)->getSecond(); auto BlockRange = BlockInstRange.find(BB)->getSecond(); - dbgs() << " BB [" << BlockRange.first << ", " << BlockRange.second + dbgs() << " BB (" << BB->getName() << ") [" << BlockRange.first << ", " << BlockRange.second << "): begin " << BlockInfo.Begin << ", end " << BlockInfo.End << ", livein " << BlockInfo.LiveIn << ", liveout " << BlockInfo.LiveOut << "\n"; From 76953321666617bcace6b067ebdde92dd9313a92 Mon Sep 17 00:00:00 2001 From: Muhammad Omair Javaid Date: Mon, 7 Sep 2020 08:38:43 +0500 Subject: [PATCH 298/465] Move NativeRegisterContextLinux/RegisterContextPOSIX*_arm to RegisterInfoAndSetInterface This patch removes register set definitions and other redundant code from NativeRegisterContextLinux/RegisterContextPOSIX*_arm. Register sets are now moved under RegisterInfosPOSIX_arm which now uses RegisterInfoAndSetInterface. This is similar to what we earlier did for AArch64. Reviewed By: labath Differential Revision: https://reviews.llvm.org/D86962 --- .../Plugins/Process/FreeBSD/FreeBSDThread.cpp | 5 +- ...RegisterContextPOSIXProcessMonitor_arm.cpp | 6 +- .../RegisterContextPOSIXProcessMonitor_arm.h | 4 +- .../Linux/NativeRegisterContextLinux_arm.cpp | 94 +++---------- .../Linux/NativeRegisterContextLinux_arm.h | 34 +---- .../Utility/RegisterContextPOSIX_arm.cpp | 131 +++--------------- .../Utility/RegisterContextPOSIX_arm.h | 51 +------ .../Process/Utility/RegisterInfoPOSIX_arm.cpp | 104 +++++++++++++- .../Process/Utility/RegisterInfoPOSIX_arm.h | 15 +- .../elf-core/RegisterContextPOSIXCore_arm.cpp | 4 +- .../elf-core/RegisterContextPOSIXCore_arm.h | 2 +- .../Process/elf-core/ThreadElfCore.cpp | 14 +- 12 files changed, 182 insertions(+), 282 deletions(-) diff --git a/lldb/source/Plugins/Process/FreeBSD/FreeBSDThread.cpp b/lldb/source/Plugins/Process/FreeBSD/FreeBSDThread.cpp index 48dbddb86cca3..3accc9cef6edb 100644 --- a/lldb/source/Plugins/Process/FreeBSD/FreeBSDThread.cpp +++ b/lldb/source/Plugins/Process/FreeBSD/FreeBSDThread.cpp @@ -164,9 +164,7 @@ lldb::RegisterContextSP FreeBSDThread::GetRegisterContext() { assert(target_arch.GetTriple().getOS() == llvm::Triple::FreeBSD); switch (target_arch.GetMachine()) { case llvm::Triple::aarch64: - break; case llvm::Triple::arm: - reg_interface = new RegisterInfoPOSIX_arm(target_arch); break; case llvm::Triple::ppc: #ifndef __powerpc64__ @@ -200,7 +198,8 @@ lldb::RegisterContextSP FreeBSDThread::GetRegisterContext() { } case llvm::Triple::arm: { RegisterContextPOSIXProcessMonitor_arm *reg_ctx = - new RegisterContextPOSIXProcessMonitor_arm(*this, 0, reg_interface); + new RegisterContextPOSIXProcessMonitor_arm( + *this, std::make_unique(target_arch)); m_posix_thread = reg_ctx; m_reg_context_sp.reset(reg_ctx); break; diff --git a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp index 4216f68faf5c6..2f4d613f767af 100644 --- a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp +++ b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.cpp @@ -21,9 +21,9 @@ using namespace lldb; #define REG_CONTEXT_SIZE (GetGPRSize()) RegisterContextPOSIXProcessMonitor_arm::RegisterContextPOSIXProcessMonitor_arm( - Thread &thread, uint32_t concrete_frame_idx, - lldb_private::RegisterInfoInterface *register_info) - : RegisterContextPOSIX_arm(thread, concrete_frame_idx, register_info) {} + lldb_private::Thread &thread, + std::unique_ptr register_info) + : RegisterContextPOSIX_arm(thread, std::move(register_info)) {} ProcessMonitor &RegisterContextPOSIXProcessMonitor_arm::GetMonitor() { ProcessSP base = CalculateProcess(); diff --git a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h index b376967df99ce..12e1f19d32fac 100644 --- a/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h +++ b/lldb/source/Plugins/Process/FreeBSD/RegisterContextPOSIXProcessMonitor_arm.h @@ -16,8 +16,8 @@ class RegisterContextPOSIXProcessMonitor_arm : public RegisterContextPOSIX_arm, public POSIXBreakpointProtocol { public: RegisterContextPOSIXProcessMonitor_arm( - lldb_private::Thread &thread, uint32_t concrete_frame_idx, - lldb_private::RegisterInfoInterface *register_info); + lldb_private::Thread &thread, + std::unique_ptr register_info); protected: bool ReadGPR(); diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp index a83491e6d8987..04714ec3c3749 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp @@ -43,55 +43,6 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::process_linux; -// arm general purpose registers. -static const uint32_t g_gpr_regnums_arm[] = { - gpr_r0_arm, gpr_r1_arm, gpr_r2_arm, gpr_r3_arm, gpr_r4_arm, - gpr_r5_arm, gpr_r6_arm, gpr_r7_arm, gpr_r8_arm, gpr_r9_arm, - gpr_r10_arm, gpr_r11_arm, gpr_r12_arm, gpr_sp_arm, gpr_lr_arm, - gpr_pc_arm, gpr_cpsr_arm, - LLDB_INVALID_REGNUM // register sets need to end with this flag -}; -static_assert(((sizeof g_gpr_regnums_arm / sizeof g_gpr_regnums_arm[0]) - 1) == - k_num_gpr_registers_arm, - "g_gpr_regnums_arm has wrong number of register infos"); - -// arm floating point registers. -static const uint32_t g_fpu_regnums_arm[] = { - fpu_s0_arm, fpu_s1_arm, fpu_s2_arm, fpu_s3_arm, fpu_s4_arm, - fpu_s5_arm, fpu_s6_arm, fpu_s7_arm, fpu_s8_arm, fpu_s9_arm, - fpu_s10_arm, fpu_s11_arm, fpu_s12_arm, fpu_s13_arm, fpu_s14_arm, - fpu_s15_arm, fpu_s16_arm, fpu_s17_arm, fpu_s18_arm, fpu_s19_arm, - fpu_s20_arm, fpu_s21_arm, fpu_s22_arm, fpu_s23_arm, fpu_s24_arm, - fpu_s25_arm, fpu_s26_arm, fpu_s27_arm, fpu_s28_arm, fpu_s29_arm, - fpu_s30_arm, fpu_s31_arm, fpu_fpscr_arm, fpu_d0_arm, fpu_d1_arm, - fpu_d2_arm, fpu_d3_arm, fpu_d4_arm, fpu_d5_arm, fpu_d6_arm, - fpu_d7_arm, fpu_d8_arm, fpu_d9_arm, fpu_d10_arm, fpu_d11_arm, - fpu_d12_arm, fpu_d13_arm, fpu_d14_arm, fpu_d15_arm, fpu_d16_arm, - fpu_d17_arm, fpu_d18_arm, fpu_d19_arm, fpu_d20_arm, fpu_d21_arm, - fpu_d22_arm, fpu_d23_arm, fpu_d24_arm, fpu_d25_arm, fpu_d26_arm, - fpu_d27_arm, fpu_d28_arm, fpu_d29_arm, fpu_d30_arm, fpu_d31_arm, - fpu_q0_arm, fpu_q1_arm, fpu_q2_arm, fpu_q3_arm, fpu_q4_arm, - fpu_q5_arm, fpu_q6_arm, fpu_q7_arm, fpu_q8_arm, fpu_q9_arm, - fpu_q10_arm, fpu_q11_arm, fpu_q12_arm, fpu_q13_arm, fpu_q14_arm, - fpu_q15_arm, - LLDB_INVALID_REGNUM // register sets need to end with this flag -}; -static_assert(((sizeof g_fpu_regnums_arm / sizeof g_fpu_regnums_arm[0]) - 1) == - k_num_fpr_registers_arm, - "g_fpu_regnums_arm has wrong number of register infos"); - -namespace { -// Number of register sets provided by this context. -enum { k_num_register_sets = 2 }; -} - -// Register sets for arm. -static const RegisterSet g_reg_sets_arm[k_num_register_sets] = { - {"General Purpose Registers", "gpr", k_num_gpr_registers_arm, - g_gpr_regnums_arm}, - {"Floating Point Registers", "fpu", k_num_fpr_registers_arm, - g_fpu_regnums_arm}}; - #if defined(__arm__) std::unique_ptr @@ -107,22 +58,7 @@ NativeRegisterContextLinux_arm::NativeRegisterContextLinux_arm( const ArchSpec &target_arch, NativeThreadProtocol &native_thread) : NativeRegisterContextLinux(native_thread, new RegisterInfoPOSIX_arm(target_arch)) { - switch (target_arch.GetMachine()) { - case llvm::Triple::arm: - m_reg_info.num_registers = k_num_registers_arm; - m_reg_info.num_gpr_registers = k_num_gpr_registers_arm; - m_reg_info.num_fpr_registers = k_num_fpr_registers_arm; - m_reg_info.last_gpr = k_last_gpr_arm; - m_reg_info.first_fpr = k_first_fpr_arm; - m_reg_info.last_fpr = k_last_fpr_arm; - m_reg_info.first_fpr_v = fpu_s0_arm; - m_reg_info.last_fpr_v = fpu_s31_arm; - m_reg_info.gpr_flags = gpr_cpsr_arm; - break; - default: - assert(false && "Unhandled target architecture."); - break; - } + assert(target_arch.GetMachine() == llvm::Triple::arm); ::memset(&m_fpr, 0, sizeof(m_fpr)); ::memset(&m_gpr_arm, 0, sizeof(m_gpr_arm)); @@ -135,23 +71,24 @@ NativeRegisterContextLinux_arm::NativeRegisterContextLinux_arm( m_refresh_hwdebug_info = true; } +RegisterInfoPOSIX_arm &NativeRegisterContextLinux_arm::GetRegisterInfo() const { + return static_cast(*m_register_info_interface_up); +} + uint32_t NativeRegisterContextLinux_arm::GetRegisterSetCount() const { - return k_num_register_sets; + return GetRegisterInfo().GetRegisterSetCount(); } uint32_t NativeRegisterContextLinux_arm::GetUserRegisterCount() const { uint32_t count = 0; - for (uint32_t set_index = 0; set_index < k_num_register_sets; ++set_index) - count += g_reg_sets_arm[set_index].num_registers; + for (uint32_t set_index = 0; set_index < GetRegisterSetCount(); ++set_index) + count += GetRegisterSet(set_index)->num_registers; return count; } const RegisterSet * NativeRegisterContextLinux_arm::GetRegisterSet(uint32_t set_index) const { - if (set_index < k_num_register_sets) - return &g_reg_sets_arm[set_index]; - - return nullptr; + return GetRegisterInfo().GetRegisterSet(set_index); } Status @@ -336,11 +273,17 @@ Status NativeRegisterContextLinux_arm::WriteAllRegisterValues( } bool NativeRegisterContextLinux_arm::IsGPR(unsigned reg) const { - return reg <= m_reg_info.last_gpr; // GPR's come first. + if (GetRegisterInfo().GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_arm::GPRegSet) + return true; + return false; } bool NativeRegisterContextLinux_arm::IsFPR(unsigned reg) const { - return (m_reg_info.first_fpr <= reg && reg <= m_reg_info.last_fpr); + if (GetRegisterInfo().GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_arm::FPRegSet) + return true; + return false; } uint32_t NativeRegisterContextLinux_arm::NumSupportedHardwareBreakpoints() { @@ -851,8 +794,7 @@ Status NativeRegisterContextLinux_arm::WriteHardwareDebugRegs(int hwbType, uint32_t NativeRegisterContextLinux_arm::CalculateFprOffset( const RegisterInfo *reg_info) const { - return reg_info->byte_offset - - GetRegisterInfoAtIndex(m_reg_info.first_fpr)->byte_offset; + return reg_info->byte_offset - GetGPRSize(); } Status NativeRegisterContextLinux_arm::DoReadRegisterValue( diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.h b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.h index 6bd4db573c619..4ce3797e7bdea 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.h +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.h @@ -12,6 +12,7 @@ #define lldb_NativeRegisterContextLinux_arm_h #include "Plugins/Process/Linux/NativeRegisterContextLinux.h" +#include "Plugins/Process/Utility/RegisterInfoPOSIX_arm.h" #include "Plugins/Process/Utility/lldb-arm-register-enums.h" namespace lldb_private { @@ -98,37 +99,8 @@ class NativeRegisterContextLinux_arm : public NativeRegisterContextLinux { size_t GetFPRSize() override { return sizeof(m_fpr); } private: - struct RegInfo { - uint32_t num_registers; - uint32_t num_gpr_registers; - uint32_t num_fpr_registers; - - uint32_t last_gpr; - uint32_t first_fpr; - uint32_t last_fpr; - - uint32_t first_fpr_v; - uint32_t last_fpr_v; - - uint32_t gpr_flags; - }; - - struct QReg { - uint8_t bytes[16]; - }; - - struct FPU { - union { - uint32_t s[32]; - uint64_t d[32]; - QReg q[16]; // the 128-bit NEON registers - } floats; - uint32_t fpscr; - }; - uint32_t m_gpr_arm[k_num_gpr_registers_arm]; - RegInfo m_reg_info; - FPU m_fpr; + RegisterInfoPOSIX_arm::FPU m_fpr; // Debug register info for hardware breakpoints and watchpoints management. struct DREG { @@ -156,6 +128,8 @@ class NativeRegisterContextLinux_arm : public NativeRegisterContextLinux { Status WriteHardwareDebugRegs(int hwbType, int hwb_index); uint32_t CalculateFprOffset(const RegisterInfo *reg_info) const; + + RegisterInfoPOSIX_arm &GetRegisterInfo() const; }; } // namespace process_linux diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp index 617893b6b3b04..97a760396ba92 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp @@ -25,88 +25,25 @@ using namespace lldb; using namespace lldb_private; -// arm general purpose registers. -const uint32_t g_gpr_regnums_arm[] = { - gpr_r0_arm, gpr_r1_arm, gpr_r2_arm, gpr_r3_arm, gpr_r4_arm, - gpr_r5_arm, gpr_r6_arm, gpr_r7_arm, gpr_r8_arm, gpr_r9_arm, - gpr_r10_arm, gpr_r11_arm, gpr_r12_arm, gpr_sp_arm, gpr_lr_arm, - gpr_pc_arm, gpr_cpsr_arm, - LLDB_INVALID_REGNUM // register sets need to end with this flag - -}; -static_assert(((sizeof g_gpr_regnums_arm / sizeof g_gpr_regnums_arm[0]) - 1) == - k_num_gpr_registers_arm, - "g_gpr_regnums_arm has wrong number of register infos"); - -// arm floating point registers. -static const uint32_t g_fpu_regnums_arm[] = { - fpu_s0_arm, fpu_s1_arm, fpu_s2_arm, fpu_s3_arm, fpu_s4_arm, - fpu_s5_arm, fpu_s6_arm, fpu_s7_arm, fpu_s8_arm, fpu_s9_arm, - fpu_s10_arm, fpu_s11_arm, fpu_s12_arm, fpu_s13_arm, fpu_s14_arm, - fpu_s15_arm, fpu_s16_arm, fpu_s17_arm, fpu_s18_arm, fpu_s19_arm, - fpu_s20_arm, fpu_s21_arm, fpu_s22_arm, fpu_s23_arm, fpu_s24_arm, - fpu_s25_arm, fpu_s26_arm, fpu_s27_arm, fpu_s28_arm, fpu_s29_arm, - fpu_s30_arm, fpu_s31_arm, fpu_fpscr_arm, fpu_d0_arm, fpu_d1_arm, - fpu_d2_arm, fpu_d3_arm, fpu_d4_arm, fpu_d5_arm, fpu_d6_arm, - fpu_d7_arm, fpu_d8_arm, fpu_d9_arm, fpu_d10_arm, fpu_d11_arm, - fpu_d12_arm, fpu_d13_arm, fpu_d14_arm, fpu_d15_arm, fpu_d16_arm, - fpu_d17_arm, fpu_d18_arm, fpu_d19_arm, fpu_d20_arm, fpu_d21_arm, - fpu_d22_arm, fpu_d23_arm, fpu_d24_arm, fpu_d25_arm, fpu_d26_arm, - fpu_d27_arm, fpu_d28_arm, fpu_d29_arm, fpu_d30_arm, fpu_d31_arm, - fpu_q0_arm, fpu_q1_arm, fpu_q2_arm, fpu_q3_arm, fpu_q4_arm, - fpu_q5_arm, fpu_q6_arm, fpu_q7_arm, fpu_q8_arm, fpu_q9_arm, - fpu_q10_arm, fpu_q11_arm, fpu_q12_arm, fpu_q13_arm, fpu_q14_arm, - fpu_q15_arm, - LLDB_INVALID_REGNUM // register sets need to end with this flag - -}; -static_assert(((sizeof g_fpu_regnums_arm / sizeof g_fpu_regnums_arm[0]) - 1) == - k_num_fpr_registers_arm, - "g_fpu_regnums_arm has wrong number of register infos"); - -// Number of register sets provided by this context. -enum { k_num_register_sets = 2 }; - -// Register sets for arm. -static const lldb_private::RegisterSet g_reg_sets_arm[k_num_register_sets] = { - {"General Purpose Registers", "gpr", k_num_gpr_registers_arm, - g_gpr_regnums_arm}, - {"Floating Point Registers", "fpu", k_num_fpr_registers_arm, - g_fpu_regnums_arm}}; - bool RegisterContextPOSIX_arm::IsGPR(unsigned reg) { - return reg <= m_reg_info.last_gpr; // GPR's come first. + if (m_register_info_up->GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_arm::GPRegSet) + return true; + return false; } bool RegisterContextPOSIX_arm::IsFPR(unsigned reg) { - return (m_reg_info.first_fpr <= reg && reg <= m_reg_info.last_fpr); + if (m_register_info_up->GetRegisterSetFromRegisterIndex(reg) == + RegisterInfoPOSIX_arm::FPRegSet) + return true; + return false; } RegisterContextPOSIX_arm::RegisterContextPOSIX_arm( - lldb_private::Thread &thread, uint32_t concrete_frame_idx, - lldb_private::RegisterInfoInterface *register_info) - : lldb_private::RegisterContext(thread, concrete_frame_idx) { - m_register_info_up.reset(register_info); - - switch (register_info->m_target_arch.GetMachine()) { - case llvm::Triple::arm: - m_reg_info.num_registers = k_num_registers_arm; - m_reg_info.num_gpr_registers = k_num_gpr_registers_arm; - m_reg_info.num_fpr_registers = k_num_fpr_registers_arm; - m_reg_info.last_gpr = k_last_gpr_arm; - m_reg_info.first_fpr = k_first_fpr_arm; - m_reg_info.last_fpr = k_last_fpr_arm; - m_reg_info.first_fpr_v = fpu_s0_arm; - m_reg_info.last_fpr_v = fpu_s31_arm; - m_reg_info.gpr_flags = gpr_cpsr_arm; - break; - default: - assert(false && "Unhandled target architecture."); - break; - } - - ::memset(&m_fpr, 0, sizeof m_fpr); -} + lldb_private::Thread &thread, + std::unique_ptr register_info) + : lldb_private::RegisterContext(thread, 0), + m_register_info_up(std::move(register_info)) {} RegisterContextPOSIX_arm::~RegisterContextPOSIX_arm() {} @@ -115,19 +52,15 @@ void RegisterContextPOSIX_arm::Invalidate() {} void RegisterContextPOSIX_arm::InvalidateAllRegisters() {} unsigned RegisterContextPOSIX_arm::GetRegisterOffset(unsigned reg) { - assert(reg < m_reg_info.num_registers && "Invalid register number."); - return GetRegisterInfo()[reg].byte_offset; + return m_register_info_up->GetRegisterInfo()[reg].byte_offset; } unsigned RegisterContextPOSIX_arm::GetRegisterSize(unsigned reg) { - assert(reg < m_reg_info.num_registers && "Invalid register number."); - return GetRegisterInfo()[reg].byte_size; + return m_register_info_up->GetRegisterInfo()[reg].byte_size; } size_t RegisterContextPOSIX_arm::GetRegisterCount() { - size_t num_registers = - m_reg_info.num_gpr_registers + m_reg_info.num_fpr_registers; - return num_registers; + return m_register_info_up->GetRegisterCount(); } size_t RegisterContextPOSIX_arm::GetGPRSize() { @@ -143,41 +76,23 @@ const lldb_private::RegisterInfo *RegisterContextPOSIX_arm::GetRegisterInfo() { const lldb_private::RegisterInfo * RegisterContextPOSIX_arm::GetRegisterInfoAtIndex(size_t reg) { - if (reg < m_reg_info.num_registers) + if (reg < GetRegisterCount()) return &GetRegisterInfo()[reg]; - else - return nullptr; + + return nullptr; } size_t RegisterContextPOSIX_arm::GetRegisterSetCount() { - size_t sets = 0; - for (size_t set = 0; set < k_num_register_sets; ++set) { - if (IsRegisterSetAvailable(set)) - ++sets; - } - - return sets; + return m_register_info_up->GetRegisterSetCount(); } const lldb_private::RegisterSet * RegisterContextPOSIX_arm::GetRegisterSet(size_t set) { - if (IsRegisterSetAvailable(set)) { - switch (m_register_info_up->m_target_arch.GetMachine()) { - case llvm::Triple::arm: - return &g_reg_sets_arm[set]; - default: - assert(false && "Unhandled target architecture."); - return nullptr; - } - } - return nullptr; + return m_register_info_up->GetRegisterSet(set); } const char *RegisterContextPOSIX_arm::GetRegisterName(unsigned reg) { - assert(reg < m_reg_info.num_registers && "Invalid register offset."); - return GetRegisterInfo()[reg].name; -} - -bool RegisterContextPOSIX_arm::IsRegisterSetAvailable(size_t set_index) { - return set_index < k_num_register_sets; + if (reg < GetRegisterCount()) + return GetRegisterInfo()[reg].name; + return nullptr; } diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.h b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.h index d6967f05ed487..6e7d47d5e50a6 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.h +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.h @@ -10,7 +10,7 @@ #define LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERCONTEXTPOSIX_ARM_H #include "RegisterInfoInterface.h" -#include "lldb-arm-register-enums.h" +#include "RegisterInfoPOSIX_arm.h" #include "lldb/Target/RegisterContext.h" #include "lldb/Utility/Log.h" @@ -18,9 +18,9 @@ class ProcessMonitor; class RegisterContextPOSIX_arm : public lldb_private::RegisterContext { public: - RegisterContextPOSIX_arm(lldb_private::Thread &thread, - uint32_t concrete_frame_idx, - lldb_private::RegisterInfoInterface *register_info); + RegisterContextPOSIX_arm( + lldb_private::Thread &thread, + std::unique_ptr register_info); ~RegisterContextPOSIX_arm() override; @@ -45,46 +45,7 @@ class RegisterContextPOSIX_arm : public lldb_private::RegisterContext { const char *GetRegisterName(unsigned reg); protected: - struct RegInfo { - uint32_t num_registers; - uint32_t num_gpr_registers; - uint32_t num_fpr_registers; - - uint32_t last_gpr; - uint32_t first_fpr; - uint32_t last_fpr; - - uint32_t first_fpr_v; - uint32_t last_fpr_v; - - uint32_t gpr_flags; - }; - - struct QReg { - uint8_t bytes[16]; - }; - - struct FPU { - union { - uint32_t s[32]; - uint64_t d[32]; - QReg q[16]; // the 128-bit NEON registers - } floats; - uint32_t fpscr; - }; - - uint32_t m_gpr_arm[lldb_private::k_num_gpr_registers_arm]; // 32-bit general - // purpose - // registers. - RegInfo m_reg_info; - struct RegisterContextPOSIX_arm::FPU - m_fpr; // floating-point registers including extended register sets. - std::unique_ptr - m_register_info_up; // Register Info Interface (FreeBSD or Linux) - - // Determines if an extended register set is supported on the processor - // running the inferior process. - virtual bool IsRegisterSetAvailable(size_t set_index); + std::unique_ptr m_register_info_up; virtual const lldb_private::RegisterInfo *GetRegisterInfo(); @@ -92,6 +53,8 @@ class RegisterContextPOSIX_arm : public lldb_private::RegisterContext { bool IsFPR(unsigned reg); + size_t GetFPUSize() { return sizeof(RegisterInfoPOSIX_arm::FPU); } + virtual bool ReadGPR() = 0; virtual bool ReadFPR() = 0; virtual bool WriteGPR() = 0; diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp index 8fc4d5282b06a..17b96f944cda2 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp @@ -71,9 +71,87 @@ GetRegisterInfoCount(const lldb_private::ArchSpec &target_arch) { } } +// Number of register sets provided by this context. +enum { + k_num_gpr_registers = gpr_cpsr - gpr_r0 + 1, + k_num_fpr_registers = fpu_q15 - fpu_s0 + 1, + k_num_register_sets = 2 +}; + +// arm general purpose registers. +static const uint32_t g_gpr_regnums_arm[] = { + gpr_r0, gpr_r1, + gpr_r2, gpr_r3, + gpr_r4, gpr_r5, + gpr_r6, gpr_r7, + gpr_r8, gpr_r9, + gpr_r10, gpr_r11, + gpr_r12, gpr_sp, + gpr_lr, gpr_pc, + gpr_cpsr, LLDB_INVALID_REGNUM // register sets need to end with this flag +}; +static_assert(((sizeof g_gpr_regnums_arm / sizeof g_gpr_regnums_arm[0]) - 1) == + k_num_gpr_registers, + "g_gpr_regnums_arm has wrong number of register infos"); + +// arm floating point registers. +static const uint32_t g_fpu_regnums_arm[] = { + fpu_s0, fpu_s1, + fpu_s2, fpu_s3, + fpu_s4, fpu_s5, + fpu_s6, fpu_s7, + fpu_s8, fpu_s9, + fpu_s10, fpu_s11, + fpu_s12, fpu_s13, + fpu_s14, fpu_s15, + fpu_s16, fpu_s17, + fpu_s18, fpu_s19, + fpu_s20, fpu_s21, + fpu_s22, fpu_s23, + fpu_s24, fpu_s25, + fpu_s26, fpu_s27, + fpu_s28, fpu_s29, + fpu_s30, fpu_s31, + fpu_fpscr, fpu_d0, + fpu_d1, fpu_d2, + fpu_d3, fpu_d4, + fpu_d5, fpu_d6, + fpu_d7, fpu_d8, + fpu_d9, fpu_d10, + fpu_d11, fpu_d12, + fpu_d13, fpu_d14, + fpu_d15, fpu_d16, + fpu_d17, fpu_d18, + fpu_d19, fpu_d20, + fpu_d21, fpu_d22, + fpu_d23, fpu_d24, + fpu_d25, fpu_d26, + fpu_d27, fpu_d28, + fpu_d29, fpu_d30, + fpu_d31, fpu_q0, + fpu_q1, fpu_q2, + fpu_q3, fpu_q4, + fpu_q5, fpu_q6, + fpu_q7, fpu_q8, + fpu_q9, fpu_q10, + fpu_q11, fpu_q12, + fpu_q13, fpu_q14, + fpu_q15, LLDB_INVALID_REGNUM // register sets need to end with this flag +}; +static_assert(((sizeof g_fpu_regnums_arm / sizeof g_fpu_regnums_arm[0]) - 1) == + k_num_fpr_registers, + "g_fpu_regnums_arm has wrong number of register infos"); + +// Register sets for arm. +static const RegisterSet g_reg_sets_arm[k_num_register_sets] = { + {"General Purpose Registers", "gpr", k_num_gpr_registers, + g_gpr_regnums_arm}, + {"Floating Point Registers", "fpu", k_num_fpr_registers, + g_fpu_regnums_arm}}; + RegisterInfoPOSIX_arm::RegisterInfoPOSIX_arm( const lldb_private::ArchSpec &target_arch) - : lldb_private::RegisterInfoInterface(target_arch), + : lldb_private::RegisterInfoAndSetInterface(target_arch), m_register_info_p(GetRegisterInfoPtr(target_arch)), m_register_info_count(GetRegisterInfoCount(target_arch)) {} @@ -81,11 +159,35 @@ size_t RegisterInfoPOSIX_arm::GetGPRSize() const { return sizeof(struct RegisterInfoPOSIX_arm::GPR); } +size_t RegisterInfoPOSIX_arm::GetFPRSize() const { + return sizeof(struct RegisterInfoPOSIX_arm::FPU); +} + const lldb_private::RegisterInfo * RegisterInfoPOSIX_arm::GetRegisterInfo() const { return m_register_info_p; } +size_t RegisterInfoPOSIX_arm::GetRegisterSetCount() const { + return k_num_register_sets; +} + +size_t RegisterInfoPOSIX_arm::GetRegisterSetFromRegisterIndex( + uint32_t reg_index) const { + if (reg_index <= gpr_cpsr) + return GPRegSet; + if (reg_index <= fpu_q15) + return FPRegSet; + return LLDB_INVALID_REGNUM; +} + +const lldb_private::RegisterSet * +RegisterInfoPOSIX_arm::GetRegisterSet(size_t set_index) const { + if (set_index < GetRegisterSetCount()) + return &g_reg_sets_arm[set_index]; + return nullptr; +} + uint32_t RegisterInfoPOSIX_arm::GetRegisterCount() const { return m_register_info_count; } diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.h index 1cf896e3decfd..db155d757ca8c 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.h +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.h @@ -9,12 +9,14 @@ #ifndef LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERINFOPOSIX_ARM_H #define LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERINFOPOSIX_ARM_H -#include "RegisterInfoInterface.h" +#include "RegisterInfoAndSetInterface.h" #include "lldb/Target/RegisterContext.h" #include "lldb/lldb-private.h" -class RegisterInfoPOSIX_arm : public lldb_private::RegisterInfoInterface { +class RegisterInfoPOSIX_arm : public lldb_private::RegisterInfoAndSetInterface { public: + enum { GPRegSet = 0, FPRegSet}; + struct GPR { uint32_t r[16]; // R0-R15 uint32_t cpsr; // CPSR @@ -49,10 +51,19 @@ class RegisterInfoPOSIX_arm : public lldb_private::RegisterInfoInterface { size_t GetGPRSize() const override; + size_t GetFPRSize() const override; + const lldb_private::RegisterInfo *GetRegisterInfo() const override; uint32_t GetRegisterCount() const override; + const lldb_private::RegisterSet * + GetRegisterSet(size_t reg_set) const override; + + size_t GetRegisterSetCount() const override; + + size_t GetRegisterSetFromRegisterIndex(uint32_t reg_index) const override; + private: const lldb_private::RegisterInfo *m_register_info_p; uint32_t m_register_info_count; diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp index b76f26a584c04..2f71f175a00d9 100644 --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp @@ -16,9 +16,9 @@ using namespace lldb_private; RegisterContextCorePOSIX_arm::RegisterContextCorePOSIX_arm( - Thread &thread, RegisterInfoInterface *register_info, + Thread &thread, std::unique_ptr register_info, const DataExtractor &gpregset, llvm::ArrayRef notes) - : RegisterContextPOSIX_arm(thread, 0, register_info) { + : RegisterContextPOSIX_arm(thread, std::move(register_info)) { m_gpr_buffer = std::make_shared(gpregset.GetDataStart(), gpregset.GetByteSize()); m_gpr.SetData(m_gpr_buffer); diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.h b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.h index f9ec08ed35fcf..de343f9001e06 100644 --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.h +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.h @@ -18,7 +18,7 @@ class RegisterContextCorePOSIX_arm : public RegisterContextPOSIX_arm { public: RegisterContextCorePOSIX_arm( lldb_private::Thread &thread, - lldb_private::RegisterInfoInterface *register_info, + std::unique_ptr register_info, const lldb_private::DataExtractor &gpregset, llvm::ArrayRef notes); diff --git a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp index 6b5acfa4bc1bb..76c0c2843e6df 100644 --- a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp +++ b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp @@ -82,9 +82,7 @@ ThreadElfCore::CreateRegisterContextForFrame(StackFrame *frame) { case llvm::Triple::FreeBSD: { switch (arch.GetMachine()) { case llvm::Triple::aarch64: - break; case llvm::Triple::arm: - reg_interface = new RegisterInfoPOSIX_arm(arch); break; case llvm::Triple::ppc: reg_interface = new RegisterContextFreeBSD_powerpc32(arch); @@ -122,9 +120,6 @@ ThreadElfCore::CreateRegisterContextForFrame(StackFrame *frame) { case llvm::Triple::Linux: { switch (arch.GetMachine()) { - case llvm::Triple::arm: - reg_interface = new RegisterInfoPOSIX_arm(arch); - break; case llvm::Triple::aarch64: break; case llvm::Triple::mipsel: @@ -157,9 +152,6 @@ ThreadElfCore::CreateRegisterContextForFrame(StackFrame *frame) { switch (arch.GetMachine()) { case llvm::Triple::aarch64: break; - case llvm::Triple::arm: - reg_interface = new RegisterInfoPOSIX_arm(arch); - break; case llvm::Triple::x86: reg_interface = new RegisterContextOpenBSD_i386(arch); break; @@ -176,7 +168,8 @@ ThreadElfCore::CreateRegisterContextForFrame(StackFrame *frame) { break; } - if (!reg_interface && arch.GetMachine() != llvm::Triple::aarch64) { + if (!reg_interface && arch.GetMachine() != llvm::Triple::aarch64 && + arch.GetMachine() != llvm::Triple::arm) { LLDB_LOGF(log, "elf-core::%s:: Architecture(%d) or OS(%d) not supported", __FUNCTION__, arch.GetMachine(), arch.GetTriple().getOS()); assert(false && "Architecture or OS not supported"); @@ -190,7 +183,8 @@ ThreadElfCore::CreateRegisterContextForFrame(StackFrame *frame) { break; case llvm::Triple::arm: m_thread_reg_ctx_sp = std::make_shared( - *this, reg_interface, m_gpregset_data, m_notes); + *this, std::make_unique(arch), m_gpregset_data, + m_notes); break; case llvm::Triple::mipsel: case llvm::Triple::mips: From 7403e3ee324018c79d0e55532240952dbaa4fcbe Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Sun, 6 Sep 2020 12:16:40 -0700 Subject: [PATCH 299/465] Extend PyConcreteType to support intermediate base classes. * Resolves todos from D87091. * Also modifies PyConcreteAttribute to follow suite (should be useful for ElementsAttr and friends). * Adds a test to ensure that the ShapedType base class functions as expected. Differential Revision: https://reviews.llvm.org/D87208 --- mlir/lib/Bindings/Python/IRModules.cpp | 306 ++++++++++++------------- mlir/test/Bindings/Python/ir_types.py | 29 ++- 2 files changed, 170 insertions(+), 165 deletions(-) diff --git a/mlir/lib/Bindings/Python/IRModules.cpp b/mlir/lib/Bindings/Python/IRModules.cpp index 149e231aed0b4..bf1235a77d08c 100644 --- a/mlir/lib/Bindings/Python/IRModules.cpp +++ b/mlir/lib/Bindings/Python/IRModules.cpp @@ -221,34 +221,37 @@ namespace { /// CRTP base classes for Python attributes that subclass Attribute and should /// be castable from it (i.e. via something like StringAttr(attr)). -template -class PyConcreteAttribute : public PyAttribute { +/// By default, attribute class hierarchies are one level deep (i.e. a +/// concrete attribute class extends PyAttribute); however, intermediate +/// python-visible base classes can be modeled by specifying a BaseTy. +template +class PyConcreteAttribute : public BaseTy { public: // Derived classes must define statics for: // IsAFunctionTy isaFunction // const char *pyClassName - using ClassTy = py::class_; + using ClassTy = py::class_; using IsAFunctionTy = int (*)(MlirAttribute); PyConcreteAttribute() = default; - PyConcreteAttribute(MlirAttribute attr) : PyAttribute(attr) {} + PyConcreteAttribute(MlirAttribute attr) : BaseTy(attr) {} PyConcreteAttribute(PyAttribute &orig) : PyConcreteAttribute(castFrom(orig)) {} static MlirAttribute castFrom(PyAttribute &orig) { - if (!T::isaFunction(orig.attr)) { + if (!DerivedTy::isaFunction(orig.attr)) { auto origRepr = py::repr(py::cast(orig)).cast(); throw SetPyError(PyExc_ValueError, llvm::Twine("Cannot cast attribute to ") + - T::pyClassName + " (from " + origRepr + ")"); + DerivedTy::pyClassName + " (from " + origRepr + ")"); } return orig.attr; } static void bind(py::module &m) { - auto cls = ClassTy(m, T::pyClassName); + auto cls = ClassTy(m, DerivedTy::pyClassName); cls.def(py::init(), py::keep_alive<0, 1>()); - T::bindDerived(cls); + DerivedTy::bindDerived(cls); } /// Implemented by derived classes to add methods to the Python subclass. @@ -301,33 +304,36 @@ namespace { /// CRTP base classes for Python types that subclass Type and should be /// castable from it (i.e. via something like IntegerType(t)). -template -class PyConcreteType : public PyType { +/// By default, type class hierarchies are one level deep (i.e. a +/// concrete type class extends PyType); however, intermediate python-visible +/// base classes can be modeled by specifying a BaseTy. +template +class PyConcreteType : public BaseTy { public: // Derived classes must define statics for: // IsAFunctionTy isaFunction // const char *pyClassName - using ClassTy = py::class_; + using ClassTy = py::class_; using IsAFunctionTy = int (*)(MlirType); PyConcreteType() = default; - PyConcreteType(MlirType t) : PyType(t) {} - PyConcreteType(PyType &orig) : PyType(castFrom(orig)) {} + PyConcreteType(MlirType t) : BaseTy(t) {} + PyConcreteType(PyType &orig) : PyConcreteType(castFrom(orig)) {} static MlirType castFrom(PyType &orig) { - if (!T::isaFunction(orig.type)) { + if (!DerivedTy::isaFunction(orig.type)) { auto origRepr = py::repr(py::cast(orig)).cast(); throw SetPyError(PyExc_ValueError, llvm::Twine("Cannot cast type to ") + - T::pyClassName + " (from " + - origRepr + ")"); + DerivedTy::pyClassName + + " (from " + origRepr + ")"); } return orig.type; } static void bind(py::module &m) { - auto cls = ClassTy(m, T::pyClassName); + auto cls = ClassTy(m, DerivedTy::pyClassName); cls.def(py::init(), py::keep_alive<0, 1>()); - T::bindDerived(cls); + DerivedTy::bindDerived(cls); } /// Implemented by derived classes to add methods to the Python subclass. @@ -590,142 +596,130 @@ class PyShapedType : public PyConcreteType { }; /// Vector Type subclass - VectorType. -class PyVectorType : public PyShapedType { +class PyVectorType : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAVector; static constexpr const char *pyClassName = "VectorType"; - using PyShapedType::PyShapedType; - // TODO: Switch back to bindDerived by making the ClassTy modifiable by - // subclasses, exposing the ShapedType hierarchy. - static void bind(py::module &m) { - py::class_(m, pyClassName) - .def(py::init(), py::keep_alive<0, 1>()) - .def_static( - "get_vector", - // TODO: Make the location optional and create a default location. - [](std::vector shape, PyType &elementType, - PyLocation &loc) { - MlirType t = mlirVectorTypeGetChecked(shape.size(), shape.data(), - elementType.type, loc.loc); - // TODO: Rework error reporting once diagnostic engine is exposed - // in C API. - if (mlirTypeIsNull(t)) { - throw SetPyError( - PyExc_ValueError, - llvm::Twine("invalid '") + - py::repr(py::cast(elementType)).cast() + - "' and expected floating point or integer type."); - } - return PyVectorType(t); - }, - py::keep_alive<0, 2>(), "Create a vector type"); + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c) { + c.def_static( + "get_vector", + // TODO: Make the location optional and create a default location. + [](std::vector shape, PyType &elementType, PyLocation &loc) { + MlirType t = mlirVectorTypeGetChecked(shape.size(), shape.data(), + elementType.type, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point or integer type."); + } + return PyVectorType(t); + }, + py::keep_alive<0, 2>(), "Create a vector type"); } }; /// Ranked Tensor Type subclass - RankedTensorType. -class PyRankedTensorType : public PyShapedType { +class PyRankedTensorType + : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsARankedTensor; static constexpr const char *pyClassName = "RankedTensorType"; - using PyShapedType::PyShapedType; - // TODO: Switch back to bindDerived by making the ClassTy modifiable by - // subclasses, exposing the ShapedType hierarchy. - static void bind(py::module &m) { - py::class_(m, pyClassName) - .def(py::init(), py::keep_alive<0, 1>()) - .def_static( - "get_ranked_tensor", - // TODO: Make the location optional and create a default location. - [](std::vector shape, PyType &elementType, - PyLocation &loc) { - MlirType t = mlirRankedTensorTypeGetChecked( - shape.size(), shape.data(), elementType.type, loc.loc); - // TODO: Rework error reporting once diagnostic engine is exposed - // in C API. - if (mlirTypeIsNull(t)) { - throw SetPyError( - PyExc_ValueError, - llvm::Twine("invalid '") + - py::repr(py::cast(elementType)).cast() + - "' and expected floating point, integer, vector or " - "complex " - "type."); - } - return PyRankedTensorType(t); - }, - py::keep_alive<0, 2>(), "Create a ranked tensor type"); + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c) { + c.def_static( + "get_ranked_tensor", + // TODO: Make the location optional and create a default location. + [](std::vector shape, PyType &elementType, PyLocation &loc) { + MlirType t = mlirRankedTensorTypeGetChecked( + shape.size(), shape.data(), elementType.type, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyRankedTensorType(t); + }, + py::keep_alive<0, 2>(), "Create a ranked tensor type"); } }; /// Unranked Tensor Type subclass - UnrankedTensorType. -class PyUnrankedTensorType : public PyShapedType { +class PyUnrankedTensorType + : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAUnrankedTensor; static constexpr const char *pyClassName = "UnrankedTensorType"; - using PyShapedType::PyShapedType; - // TODO: Switch back to bindDerived by making the ClassTy modifiable by - // subclasses, exposing the ShapedType hierarchy. - static void bind(py::module &m) { - py::class_(m, pyClassName) - .def(py::init(), py::keep_alive<0, 1>()) - .def_static( - "get_unranked_tensor", - // TODO: Make the location optional and create a default location. - [](PyType &elementType, PyLocation &loc) { - MlirType t = - mlirUnrankedTensorTypeGetChecked(elementType.type, loc.loc); - // TODO: Rework error reporting once diagnostic engine is exposed - // in C API. - if (mlirTypeIsNull(t)) { - throw SetPyError( - PyExc_ValueError, - llvm::Twine("invalid '") + - py::repr(py::cast(elementType)).cast() + - "' and expected floating point, integer, vector or " - "complex " - "type."); - } - return PyUnrankedTensorType(t); - }, - py::keep_alive<0, 1>(), "Create a unranked tensor type"); + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c) { + c.def_static( + "get_unranked_tensor", + // TODO: Make the location optional and create a default location. + [](PyType &elementType, PyLocation &loc) { + MlirType t = + mlirUnrankedTensorTypeGetChecked(elementType.type, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyUnrankedTensorType(t); + }, + py::keep_alive<0, 1>(), "Create a unranked tensor type"); } }; /// Ranked MemRef Type subclass - MemRefType. -class PyMemRefType : public PyShapedType { +class PyMemRefType : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsARankedTensor; static constexpr const char *pyClassName = "MemRefType"; - using PyShapedType::PyShapedType; - // TODO: Switch back to bindDerived by making the ClassTy modifiable by - // subclasses, exposing the ShapedType hierarchy. - static void bind(py::module &m) { - py::class_(m, pyClassName) - .def(py::init(), py::keep_alive<0, 1>()) - // TODO: Add mlirMemRefTypeGet and mlirMemRefTypeGetAffineMap binding - // once the affine map binding is completed. - .def_static( - "get_contiguous_memref", - // TODO: Make the location optional and create a default location. - [](PyType &elementType, std::vector shape, - unsigned memorySpace, PyLocation &loc) { - MlirType t = mlirMemRefTypeContiguousGetChecked( - elementType.type, shape.size(), shape.data(), memorySpace, - loc.loc); - // TODO: Rework error reporting once diagnostic engine is exposed - // in C API. - if (mlirTypeIsNull(t)) { - throw SetPyError( - PyExc_ValueError, - llvm::Twine("invalid '") + - py::repr(py::cast(elementType)).cast() + - "' and expected floating point, integer, vector or " - "complex " - "type."); - } - return PyMemRefType(t); - }, - py::keep_alive<0, 1>(), "Create a memref type") + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c) { + // TODO: Add mlirMemRefTypeGet and mlirMemRefTypeGetAffineMap binding + // once the affine map binding is completed. + c.def_static( + "get_contiguous_memref", + // TODO: Make the location optional and create a default location. + [](PyType &elementType, std::vector shape, + unsigned memorySpace, PyLocation &loc) { + MlirType t = mlirMemRefTypeContiguousGetChecked( + elementType.type, shape.size(), shape.data(), memorySpace, + loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyMemRefType(t); + }, + py::keep_alive<0, 1>(), "Create a memref type") .def_property_readonly( "num_affine_maps", [](PyMemRefType &self) -> intptr_t { @@ -743,36 +737,34 @@ class PyMemRefType : public PyShapedType { }; /// Unranked MemRef Type subclass - UnrankedMemRefType. -class PyUnrankedMemRefType : public PyShapedType { +class PyUnrankedMemRefType + : public PyConcreteType { public: static constexpr IsAFunctionTy isaFunction = mlirTypeIsAUnrankedMemRef; static constexpr const char *pyClassName = "UnrankedMemRefType"; - using PyShapedType::PyShapedType; - // TODO: Switch back to bindDerived by making the ClassTy modifiable by - // subclasses, exposing the ShapedType hierarchy. - static void bind(py::module &m) { - py::class_(m, pyClassName) - .def(py::init(), py::keep_alive<0, 1>()) - .def_static( - "get_unranked_memref", - // TODO: Make the location optional and create a default location. - [](PyType &elementType, unsigned memorySpace, PyLocation &loc) { - MlirType t = mlirUnrankedMemRefTypeGetChecked( - elementType.type, memorySpace, loc.loc); - // TODO: Rework error reporting once diagnostic engine is exposed - // in C API. - if (mlirTypeIsNull(t)) { - throw SetPyError( - PyExc_ValueError, - llvm::Twine("invalid '") + - py::repr(py::cast(elementType)).cast() + - "' and expected floating point, integer, vector or " - "complex " - "type."); - } - return PyUnrankedMemRefType(t); - }, - py::keep_alive<0, 1>(), "Create a unranked memref type") + using PyConcreteType::PyConcreteType; + + static void bindDerived(ClassTy &c) { + c.def_static( + "get_unranked_memref", + // TODO: Make the location optional and create a default location. + [](PyType &elementType, unsigned memorySpace, PyLocation &loc) { + MlirType t = mlirUnrankedMemRefTypeGetChecked(elementType.type, + memorySpace, loc.loc); + // TODO: Rework error reporting once diagnostic engine is exposed + // in C API. + if (mlirTypeIsNull(t)) { + throw SetPyError( + PyExc_ValueError, + llvm::Twine("invalid '") + + py::repr(py::cast(elementType)).cast() + + "' and expected floating point, integer, vector or " + "complex " + "type."); + } + return PyUnrankedMemRefType(t); + }, + py::keep_alive<0, 1>(), "Create a unranked memref type") .def_property_readonly( "memory_space", [](PyUnrankedMemRefType &self) -> unsigned { diff --git a/mlir/test/Bindings/Python/ir_types.py b/mlir/test/Bindings/Python/ir_types.py index 00cd595843aa6..4710bee27e37e 100644 --- a/mlir/test/Bindings/Python/ir_types.py +++ b/mlir/test/Bindings/Python/ir_types.py @@ -177,11 +177,11 @@ def testComplexType(): run(testComplexType) -# CHECK-LABEL: TEST: testShapedType +# CHECK-LABEL: TEST: testConcreteShapedType # Shaped type is not a kind of standard types, it is the base class for # vectors, memrefs and tensors, so this test case uses an instance of vector -# to test the shaped type. -def testShapedType(): +# to test the shaped type. The class hierarchy is preserved on the python side. +def testConcreteShapedType(): ctx = mlir.ir.Context() vector = mlir.ir.VectorType(ctx.parse_type("vector<2x3xf32>")) # CHECK: element type: f32 @@ -196,12 +196,25 @@ def testShapedType(): print("whether the dim-th dimension is dynamic:", vector.is_dynamic_dim(0)) # CHECK: dim size: 3 print("dim size:", vector.get_dim_size(1)) - # CHECK: False - print(vector.is_dynamic_size(3)) - # CHECK: False - print(vector.is_dynamic_stride_or_offset(1)) + # CHECK: is_dynamic_size: False + print("is_dynamic_size:", vector.is_dynamic_size(3)) + # CHECK: is_dynamic_stride_or_offset: False + print("is_dynamic_stride_or_offset:", vector.is_dynamic_stride_or_offset(1)) + # CHECK: isinstance(ShapedType): True + print("isinstance(ShapedType):", isinstance(vector, mlir.ir.ShapedType)) + +run(testConcreteShapedType) + +# CHECK-LABEL: TEST: testAbstractShapedType +# Tests that ShapedType operates as an abstract base class of a concrete +# shaped type (using vector as an example). +def testAbstractShapedType(): + ctx = mlir.ir.Context() + vector = mlir.ir.ShapedType(ctx.parse_type("vector<2x3xf32>")) + # CHECK: element type: f32 + print("element type:", vector.element_type) -run(testShapedType) +run(testAbstractShapedType) # CHECK-LABEL: TEST: testVectorType def testVectorType(): From 70523ecfaca692bf5d0192e466c34ae7514624ea Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Mon, 7 Sep 2020 02:27:11 -0400 Subject: [PATCH 300/465] [Sparc] Select the UltraSPARC instruction set with the external assembler Select the UltraSPARC instruction set with the external assembler on Linux / FreeBSD / OpenBSD, matches GCC. --- clang/lib/Driver/ToolChains/Arch/Sparc.cpp | 9 ++++++++- clang/test/Driver/freebsd.c | 2 +- clang/test/Driver/linux-as.c | 4 ++-- clang/test/Driver/openbsd.c | 2 +- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp index 043b7f257c01d..70ba8eb2a7d0d 100644 --- a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp @@ -21,12 +21,19 @@ using namespace llvm::opt; const char *sparc::getSparcAsmModeForCPU(StringRef Name, const llvm::Triple &Triple) { if (Triple.getArch() == llvm::Triple::sparcv9) { + const char *DefV9CPU; + + if (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD()) + DefV9CPU = "-Av9a"; + else + DefV9CPU = "-Av9"; + return llvm::StringSwitch(Name) .Case("niagara", "-Av9b") .Case("niagara2", "-Av9b") .Case("niagara3", "-Av9d") .Case("niagara4", "-Av9d") - .Default("-Av9"); + .Default(DefV9CPU); } else { return llvm::StringSwitch(Name) .Case("v8", "-Av8") diff --git a/clang/test/Driver/freebsd.c b/clang/test/Driver/freebsd.c index 769bb22da0dc7..1bf6dab802a1c 100644 --- a/clang/test/Driver/freebsd.c +++ b/clang/test/Driver/freebsd.c @@ -176,7 +176,7 @@ // RUN: %clang -mcpu=ultrasparc -target sparc64-unknown-freebsd8 %s -### -no-integrated-as 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-SPARC-CPU %s // CHECK-SPARC-CPU: cc1{{.*}}" "-target-cpu" "ultrasparc" -// CHECK-SPARC-CPU: as{{.*}}" "-Av9 +// CHECK-SPARC-CPU: as{{.*}}" "-Av9a // Check that -G flags are passed to the linker for mips // RUN: %clang -target mips-unknown-freebsd %s -### -G0 2>&1 \ diff --git a/clang/test/Driver/linux-as.c b/clang/test/Driver/linux-as.c index 77ac05f30942c..0959bd7ba0a11 100644 --- a/clang/test/Driver/linux-as.c +++ b/clang/test/Driver/linux-as.c @@ -168,7 +168,7 @@ // RUN: | FileCheck -check-prefix=CHECK-SPARCV9 %s // CHECK-SPARCV9: as // CHECK-SPARCV9: -64 -// CHECK-SPARCV9: -Av9 +// CHECK-SPARCV9: -Av9a // CHECK-SPARCV9-NOT: -KPIC // CHECK-SPARCV9: -o // @@ -177,7 +177,7 @@ // RUN: | FileCheck -check-prefix=CHECK-SPARCV9PIC %s // CHECK-SPARCV9PIC: as // CHECK-SPARCV9PIC: -64 -// CHECK-SPARCV9PIC: -Av9 +// CHECK-SPARCV9PIC: -Av9a // CHECK-SPARCV9PIC: -KPIC // CHECK-SPARCV9PIC: -o // diff --git a/clang/test/Driver/openbsd.c b/clang/test/Driver/openbsd.c index 203b4b4a2ff0f..ae1aa64416907 100644 --- a/clang/test/Driver/openbsd.c +++ b/clang/test/Driver/openbsd.c @@ -70,7 +70,7 @@ // RUN: | FileCheck -check-prefix=CHECK-MIPS64EL-PIC %s // CHECK-AMD64-M32: as{{.*}}" "--32" // CHECK-POWERPC: as{{.*}}" "-mppc" "-many" -// CHECK-SPARC64: as{{.*}}" "-64" "-Av9" +// CHECK-SPARC64: as{{.*}}" "-64" "-Av9a" // CHECK-MIPS64: as{{.*}}" "-mabi" "64" "-EB" // CHECK-MIPS64-PIC: as{{.*}}" "-mabi" "64" "-EB" "-KPIC" // CHECK-MIPS64EL: as{{.*}}" "-mabi" "64" "-EL" From b3205e2ace4378600dedba0cc5a42b481c4e22c9 Mon Sep 17 00:00:00 2001 From: Tomas Rix Date: Mon, 7 Sep 2020 09:10:15 +0200 Subject: [PATCH 301/465] [scan-view] Explicitly use utf-8 in send_string send_patched_file decodes with utf-8. The default encoder for python 2 is ascii. So it is necessary to also change send_string to use utf-8. Differential Revision: https://reviews.llvm.org/D83984 --- clang/tools/scan-view/share/ScanView.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/tools/scan-view/share/ScanView.py b/clang/tools/scan-view/share/ScanView.py index a6cc7692ffe00..5a5d15e85b30c 100644 --- a/clang/tools/scan-view/share/ScanView.py +++ b/clang/tools/scan-view/share/ScanView.py @@ -744,7 +744,7 @@ def send_file(self, f, ctype): return f def send_string(self, s, ctype='text/html', headers=True, mtime=None): - encoded_s = s.encode() + encoded_s = s.encode('utf-8') if headers: self.send_response(200) self.send_header("Content-type", ctype) From 80186e4efc92aaa0c279846a438950c7bbe1e022 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sat, 5 Sep 2020 18:21:05 +0300 Subject: [PATCH 302/465] [CMake][TableGen] Simplify code by using list(TRANSFORM) LLVM requires CMake 3.13.4 so now we can simplify the code. Reviewed By: phosek Differential Revision: https://reviews.llvm.org/D87193 --- llvm/cmake/modules/TableGen.cmake | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake index 73c1e96d3d9ad..d58ee1de043fe 100644 --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -80,14 +80,6 @@ function(tablegen project ofn) set(tblgen_change_flag "--write-if-changed") endif() - # With CMake 3.12 this can be reduced to: - # get_directory_property(tblgen_includes "INCLUDE_DIRECTORIES") - # list(TRANSFORM tblgen_includes PREPEND -I) - set(tblgen_includes) - get_directory_property(includes "INCLUDE_DIRECTORIES") - foreach(include ${includes}) - list(APPEND tblgen_includes -I ${include}) - endforeach() # We need both _TABLEGEN_TARGET and _TABLEGEN_EXE in the DEPENDS list # (both the target and the file) to have .inc files rebuilt on # a tablegen change, as cmake does not propagate file-level dependencies @@ -97,6 +89,9 @@ function(tablegen project ofn) # dependency twice in the result file when # ("${${project}_TABLEGEN_TARGET}" STREQUAL "${${project}_TABLEGEN_EXE}") # but lets us having smaller and cleaner code here. + get_directory_property(tblgen_includes INCLUDE_DIRECTORIES) + list(TRANSFORM tblgen_includes PREPEND -I) + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR} ${tblgen_includes} From 098130fa403a82f2a425761bbccdede022fac3ff Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sat, 5 Sep 2020 18:15:32 +0300 Subject: [PATCH 303/465] [CMake][Polly] Remove dead CMake code LLVM requires CMake 3.13.4 so remove code behind checks for an older version. Reviewed By: phosek Differential Revision: https://reviews.llvm.org/D87192 --- polly/cmake/polly_macros.cmake | 15 --------------- polly/lib/External/CMakeLists.txt | 3 +-- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/polly/cmake/polly_macros.cmake b/polly/cmake/polly_macros.cmake index 86de6f10686eb..518a09b45a420 100644 --- a/polly/cmake/polly_macros.cmake +++ b/polly/cmake/polly_macros.cmake @@ -72,21 +72,6 @@ macro(add_polly_loadable_module name) endif() endmacro(add_polly_loadable_module) -# Use C99-compatible compile mode for all C source files of a target. -function(target_enable_c99 _target) - if(CMAKE_VERSION VERSION_GREATER "3.1") - set_target_properties("${_target}" PROPERTIES C_STANDARD 99) - elseif(CMAKE_COMPILER_IS_GNUCC) - get_target_property(_sources "${_target}" SOURCES) - foreach(_file IN LISTS _sources) - get_source_file_property(_lang "${_file}" LANGUAGE) - if(_lang STREQUAL "C") - set_source_files_properties(${_file} COMPILE_FLAGS "-std=gnu99") - endif() - endforeach() - endif() -endfunction() - # Recursive helper for setup_source_group. Traverse the file system and add # source files matching the glob_expr to the prefix, recursing into # subdirectories as they are encountered diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt index 1039079cb49ca..c953ea48475d7 100644 --- a/polly/lib/External/CMakeLists.txt +++ b/polly/lib/External/CMakeLists.txt @@ -293,8 +293,7 @@ if (POLLY_BUNDLED_ISL) ) # ISL requires at least C99 to compile. gcc < 5.0 use -std=gnu89 as default. - target_enable_c99(PollyISL) - target_enable_c99(polly-isl-test) + set_property(TARGET PollyISL polly-isl-test PROPERTY C_STANDARD 99) endif (POLLY_BUNDLED_ISL) set(PET_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pet") From 21c0e74c9e7fa33153c484a6dabf33b38aede0d1 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sat, 5 Sep 2020 18:11:34 +0300 Subject: [PATCH 304/465] [CMake][OpenMP] Remove old dead CMake code LLVM requires CMake 3.13.4 so remove code behind checks for an older version. Reviewed By: phosek Differential Revision: https://reviews.llvm.org/D87191 --- openmp/cmake/OpenMPTesting.cmake | 9 +--- .../cmake/LibompCheckFortranFlag.cmake | 47 +------------------ 2 files changed, 2 insertions(+), 54 deletions(-) diff --git a/openmp/cmake/OpenMPTesting.cmake b/openmp/cmake/OpenMPTesting.cmake index 90e0704c4a94c..1d46b141ffdf8 100644 --- a/openmp/cmake/OpenMPTesting.cmake +++ b/openmp/cmake/OpenMPTesting.cmake @@ -50,13 +50,6 @@ endfunction() if (${OPENMP_STANDALONE_BUILD}) find_standalone_test_dependencies() - # Make sure we can use the console pool for recent CMake and Ninja > 1.5. - if (CMAKE_VERSION VERSION_LESS 3.1.20141117) - set(cmake_3_2_USES_TERMINAL) - else() - set(cmake_3_2_USES_TERMINAL USES_TERMINAL) - endif() - # Set lit arguments. set(DEFAULT_LIT_ARGS "-sv --show-unsupported --show-xfail") if (MSVC OR XCODE) @@ -189,7 +182,7 @@ function(add_openmp_testsuite target comment) COMMAND ${PYTHON_EXECUTABLE} ${OPENMP_LLVM_LIT_EXECUTABLE} ${LIT_ARGS} ${ARG_UNPARSED_ARGUMENTS} COMMENT ${comment} DEPENDS ${ARG_DEPENDS} - ${cmake_3_2_USES_TERMINAL} + USES_TERMINAL ) else() if (ARG_EXCLUDE_FROM_CHECK_ALL) diff --git a/openmp/runtime/cmake/LibompCheckFortranFlag.cmake b/openmp/runtime/cmake/LibompCheckFortranFlag.cmake index 21837ef068e36..b8cdb28a4bf28 100644 --- a/openmp/runtime/cmake/LibompCheckFortranFlag.cmake +++ b/openmp/runtime/cmake/LibompCheckFortranFlag.cmake @@ -19,54 +19,9 @@ function(libomp_check_fortran_flag flag boolean) print *, \"Hello World!\" end program hello") - set(failed_regexes "[Ee]rror;[Uu]nknown;[Ss]kipping") - if(CMAKE_VERSION VERSION_GREATER 3.1 OR CMAKE_VERSION VERSION_EQUAL 3.1) + set(failed_regexes "[Ee]rror;[Uu]nknown;[Ss]kipping") include(CheckFortranSourceCompiles) check_fortran_source_compiles("${fortran_source}" ${boolean} FAIL_REGEX "${failed_regexes}") set(${boolean} ${${boolean}} PARENT_SCOPE) - return() - else() - # Our manual check for cmake versions that don't have CheckFortranSourceCompiles - set(base_dir ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/fortran_flag_check) - file(MAKE_DIRECTORY ${base_dir}) - file(WRITE ${base_dir}/fortran_source.f "${fortran_source}") - - message(STATUS "Performing Test ${boolean}") - execute_process( - COMMAND ${CMAKE_Fortran_COMPILER} "${flag}" ${base_dir}/fortran_source.f - WORKING_DIRECTORY ${base_dir} - RESULT_VARIABLE exit_code - OUTPUT_VARIABLE OUTPUT - ERROR_VARIABLE OUTPUT - ) - - if(${exit_code} EQUAL 0) - foreach(regex IN LISTS failed_regexes) - if("${OUTPUT}" MATCHES ${regex}) - set(retval FALSE) - endif() - endforeach() - else() - set(retval FALSE) - endif() - - if(${retval}) - set(${boolean} 1 CACHE INTERNAL "Test ${boolean}") - message(STATUS "Performing Test ${boolean} - Success") - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log - "Performing Fortran Compiler Flag test ${boolean} succeeded with the following output:\n" - "${OUTPUT}\n" - "Source file was:\n${fortran_source}\n") - else() - set(${boolean} "" CACHE INTERNAL "Test ${boolean}") - message(STATUS "Performing Test ${boolean} - Failed") - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log - "Performing Fortran Compiler Flag test ${boolean} failed with the following output:\n" - "${OUTPUT}\n" - "Source file was:\n${fortran_source}\n") - endif() - endif() - - set(${boolean} ${retval} PARENT_SCOPE) endif() endfunction() From f4835b94f2cfc89e430263d1807b118e0e937f4d Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sat, 5 Sep 2020 18:05:14 +0300 Subject: [PATCH 305/465] [CMake][TableGen] Remove dead CMake version checks LLVM requires CMake 3.13.4, so remove version checks that are dead code. Reviewed By: phosek Differential Revision: https://reviews.llvm.org/D87190 --- llvm/cmake/modules/TableGen.cmake | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake index d58ee1de043fe..5f07acc1f6922 100644 --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -8,9 +8,8 @@ function(tablegen project ofn) message(FATAL_ERROR "${project}_TABLEGEN_EXE not set") endif() - # Use depfile instead of globbing arbitrary *.td(s) - # DEPFILE is available for Ninja Generator with CMake>=3.7. - if(CMAKE_GENERATOR STREQUAL "Ninja" AND NOT CMAKE_VERSION VERSION_LESS 3.7) + # Use depfile instead of globbing arbitrary *.td(s) for Ninja. + if(CMAKE_GENERATOR STREQUAL "Ninja") # Make output path relative to build.ninja, assuming located on # ${CMAKE_BINARY_DIR}. # CMake emits build targets as relative paths but Ninja doesn't identify @@ -134,8 +133,8 @@ macro(add_tablegen target project) set(${target}_OLD_LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS}) set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen) - # CMake-3.9 doesn't let compilation units depend on their dependent libraries. - if(NOT (CMAKE_GENERATOR STREQUAL "Ninja" AND NOT CMAKE_VERSION VERSION_LESS 3.9) AND NOT XCODE) + # CMake doesn't let compilation units depend on their dependent libraries on some generators. + if(NOT CMAKE_GENERATOR STREQUAL "Ninja" AND NOT XCODE) # FIXME: It leaks to user, callee of add_tablegen. set(LLVM_ENABLE_OBJLIB ON) endif() From 04ea680a8ccc4f9a4d7333cd712333960348c35b Mon Sep 17 00:00:00 2001 From: Kristina Bessonova Date: Mon, 7 Sep 2020 10:03:32 +0200 Subject: [PATCH 306/465] [cmake] Fix build of attribute plugin example on Windows Seems '${cmake_2_8_12_PRIVATE}' was removed a long time ago, so it should be just PRIVATE keyword here. Reviewed By: john.brawn Differential Revision: https://reviews.llvm.org/D86091 --- clang/examples/Attribute/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/examples/Attribute/CMakeLists.txt b/clang/examples/Attribute/CMakeLists.txt index ed02f5e5992f5..42f04f5039bc7 100644 --- a/clang/examples/Attribute/CMakeLists.txt +++ b/clang/examples/Attribute/CMakeLists.txt @@ -1,7 +1,7 @@ add_llvm_library(Attribute MODULE Attribute.cpp PLUGIN_TOOL clang) if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN)) - target_link_libraries(Attribute ${cmake_2_8_12_PRIVATE} + target_link_libraries(Attribute PRIVATE clangAST clangBasic clangFrontend From 5350e1b5096aa4707aa525baf7398d93b4a4f1a5 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 2 Sep 2020 16:01:48 +0100 Subject: [PATCH 307/465] [KnownBits] Implement accurate unsigned and signed max and min Use the new implementation in ValueTracking, SelectionDAG and GlobalISel. Differential Revision: https://reviews.llvm.org/D87034 --- llvm/include/llvm/Support/KnownBits.h | 16 +++++ llvm/lib/Analysis/ValueTracking.cpp | 70 +++++++------------ .../lib/CodeGen/GlobalISel/GISelKnownBits.cpp | 36 +++++----- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 27 ++----- llvm/lib/Support/KnownBits.cpp | 62 ++++++++++++++++ .../CodeGen/GlobalISel/KnownBitsTest.cpp | 4 +- llvm/unittests/Support/KnownBitsTest.cpp | 44 ++++++++++-- 7 files changed, 169 insertions(+), 90 deletions(-) diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index 5b3de63cd359a..a29e150b904a3 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -173,6 +173,10 @@ struct KnownBits { One.extractBits(NumBits, BitPosition)); } + /// Return KnownBits based on this, but updated given that the underlying + /// value is known to be greater than or equal to Val. + KnownBits makeGE(const APInt &Val) const; + /// Returns the minimum number of trailing zero bits. unsigned countMinTrailingZeros() const { return Zero.countTrailingOnes(); @@ -241,6 +245,18 @@ struct KnownBits { static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS, KnownBits RHS); + /// Compute known bits for umax(LHS, RHS). + static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for umin(LHS, RHS). + static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for smax(LHS, RHS). + static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for smin(LHS, RHS). + static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS); + /// Insert the bits from a smaller known bits starting at bitPosition. void insertBits(const KnownBits &SubBits, unsigned BitPosition) { Zero.insertBits(SubBits.Zero, BitPosition); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 3a6ee355c646d..6e5a7195bb194 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1212,59 +1212,41 @@ static void computeKnownBitsFromOperator(const Operator *I, if (SelectPatternResult::isMinOrMax(SPF)) { computeKnownBits(RHS, Known, Depth + 1, Q); computeKnownBits(LHS, Known2, Depth + 1, Q); - } else { - computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); - computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + switch (SPF) { + default: + llvm_unreachable("Unhandled select pattern flavor!"); + case SPF_SMAX: + Known = KnownBits::smax(Known, Known2); + break; + case SPF_SMIN: + Known = KnownBits::smin(Known, Known2); + break; + case SPF_UMAX: + Known = KnownBits::umax(Known, Known2); + break; + case SPF_UMIN: + Known = KnownBits::umin(Known, Known2); + break; + } + break; } - unsigned MaxHighOnes = 0; - unsigned MaxHighZeros = 0; - if (SPF == SPF_SMAX) { - // If both sides are negative, the result is negative. - if (Known.isNegative() && Known2.isNegative()) - // We can derive a lower bound on the result by taking the max of the - // leading one bits. - MaxHighOnes = - std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); - // If either side is non-negative, the result is non-negative. - else if (Known.isNonNegative() || Known2.isNonNegative()) - MaxHighZeros = 1; - } else if (SPF == SPF_SMIN) { - // If both sides are non-negative, the result is non-negative. - if (Known.isNonNegative() && Known2.isNonNegative()) - // We can derive an upper bound on the result by taking the max of the - // leading zero bits. - MaxHighZeros = std::max(Known.countMinLeadingZeros(), - Known2.countMinLeadingZeros()); - // If either side is negative, the result is negative. - else if (Known.isNegative() || Known2.isNegative()) - MaxHighOnes = 1; - } else if (SPF == SPF_UMAX) { - // We can derive a lower bound on the result by taking the max of the - // leading one bits. - MaxHighOnes = - std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); - } else if (SPF == SPF_UMIN) { - // We can derive an upper bound on the result by taking the max of the - // leading zero bits. - MaxHighZeros = - std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); - } else if (SPF == SPF_ABS) { + computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + + if (SPF == SPF_ABS) { // RHS from matchSelectPattern returns the negation part of abs pattern. // If the negate has an NSW flag we can assume the sign bit of the result // will be 0 because that makes abs(INT_MIN) undefined. if (match(RHS, m_Neg(m_Specific(LHS))) && Q.IIQ.hasNoSignedWrap(cast(RHS))) - MaxHighZeros = 1; + Known.Zero.setSignBit(); } - // Only known if known in both the LHS and RHS. - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; - if (MaxHighOnes > 0) - Known.One.setHighBits(MaxHighOnes); - if (MaxHighZeros > 0) - Known.Zero.setHighBits(MaxHighZeros); break; } case Instruction::FPTrunc: diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index c615462af407e..3ebbac9fd659a 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -308,11 +308,24 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known, DemandedElts, Depth + 1); break; } - case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMIN: { + // TODO: Handle clamp pattern with number of sign bits + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, + Depth + 1); + Known = KnownBits::smin(Known, KnownRHS); + break; + } case TargetOpcode::G_SMAX: { // TODO: Handle clamp pattern with number of sign bits - computeKnownBitsMin(MI.getOperand(1).getReg(), MI.getOperand(2).getReg(), - Known, DemandedElts, Depth + 1); + KnownBits KnownRHS; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, + Depth + 1); + Known = KnownBits::smax(Known, KnownRHS); break; } case TargetOpcode::G_UMIN: { @@ -321,13 +334,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, DemandedElts, Depth + 1); computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, Depth + 1); - - // UMIN - we know that the result will have the maximum of the - // known zero leading bits of the inputs. - unsigned LeadZero = Known.countMinLeadingZeros(); - LeadZero = std::max(LeadZero, KnownRHS.countMinLeadingZeros()); - Known &= KnownRHS; - Known.Zero.setHighBits(LeadZero); + Known = KnownBits::umin(Known, KnownRHS); break; } case TargetOpcode::G_UMAX: { @@ -336,14 +343,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, DemandedElts, Depth + 1); computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, Depth + 1); - - // UMAX - we know that the result will have the maximum of the - // known one leading bits of the inputs. - unsigned LeadOne = Known.countMinLeadingOnes(); - LeadOne = std::max(LeadOne, KnownRHS.countMinLeadingOnes()); - Known.Zero &= KnownRHS.Zero; - Known.One &= KnownRHS.One; - Known.One.setHighBits(LeadOne); + Known = KnownBits::umax(Known, KnownRHS); break; } case TargetOpcode::G_FCMP: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 73e042c475402..d2b3e009c2026 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3390,29 +3390,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::UMIN: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - - // UMIN - we know that the result will have the maximum of the - // known zero leading bits of the inputs. - unsigned LeadZero = Known.countMinLeadingZeros(); - LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros()); - - Known.Zero &= Known2.Zero; - Known.One &= Known2.One; - Known.Zero.setHighBits(LeadZero); + Known = KnownBits::umin(Known, Known2); break; } case ISD::UMAX: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - - // UMAX - we know that the result will have the maximum of the - // known one leading bits of the inputs. - unsigned LeadOne = Known.countMinLeadingOnes(); - LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes()); - - Known.Zero &= Known2.Zero; - Known.One &= Known2.One; - Known.One.setHighBits(LeadOne); + Known = KnownBits::umax(Known, Known2); break; } case ISD::SMIN: @@ -3446,12 +3430,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } } - // Fallback - just get the shared known bits of the operands. Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Known.isUnknown()) break; // Early-out Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known.Zero &= Known2.Zero; - Known.One &= Known2.One; + if (IsMax) + Known = KnownBits::smax(Known, Known2); + else + Known = KnownBits::smin(Known, Known2); break; } case ISD::FrameIndex: diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 1ff66d504cbea..aad50e1240341 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -83,6 +83,68 @@ KnownBits KnownBits::computeForAddSub(bool Add, bool NSW, return KnownOut; } +KnownBits KnownBits::makeGE(const APInt &Val) const { + // Count the number of leading bit positions where our underlying value is + // known to be less than or equal to Val. + unsigned N = (Zero | Val).countLeadingOnes(); + + // For each of those bit positions, if Val has a 1 in that bit then our + // underlying value must also have a 1. + APInt MaskedVal(Val); + MaskedVal.clearLowBits(getBitWidth() - N); + return KnownBits(Zero, One | MaskedVal); +} + +KnownBits KnownBits::umax(const KnownBits &LHS, const KnownBits &RHS) { + // If we can prove that LHS >= RHS then use LHS as the result. Likewise for + // RHS. Ideally our caller would already have spotted these cases and + // optimized away the umax operation, but we handle them here for + // completeness. + if (LHS.getMinValue().uge(RHS.getMaxValue())) + return LHS; + if (RHS.getMinValue().uge(LHS.getMaxValue())) + return RHS; + + // If the result of the umax is LHS then it must be greater than or equal to + // the minimum possible value of RHS. Likewise for RHS. Any known bits that + // are common to these two values are also known in the result. + KnownBits L = LHS.makeGE(RHS.getMinValue()); + KnownBits R = RHS.makeGE(LHS.getMinValue()); + return KnownBits(L.Zero & R.Zero, L.One & R.One); +} + +KnownBits KnownBits::umin(const KnownBits &LHS, const KnownBits &RHS) { + // Flip the range of values: [0, 0xFFFFFFFF] <-> [0xFFFFFFFF, 0] + auto Flip = [](KnownBits Val) { return KnownBits(Val.One, Val.Zero); }; + return Flip(umax(Flip(LHS), Flip(RHS))); +} + +KnownBits KnownBits::smax(const KnownBits &LHS, const KnownBits &RHS) { + // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0, 0xFFFFFFFF] + auto Flip = [](KnownBits Val) { + unsigned SignBitPosition = Val.getBitWidth() - 1; + APInt Zero = Val.Zero; + APInt One = Val.One; + Zero.setBitVal(SignBitPosition, Val.One[SignBitPosition]); + One.setBitVal(SignBitPosition, Val.Zero[SignBitPosition]); + return KnownBits(Zero, One); + }; + return Flip(umax(Flip(LHS), Flip(RHS))); +} + +KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) { + // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0xFFFFFFFF, 0] + auto Flip = [](KnownBits Val) { + unsigned SignBitPosition = Val.getBitWidth() - 1; + APInt Zero = Val.One; + APInt One = Val.Zero; + Zero.setBitVal(SignBitPosition, Val.Zero[SignBitPosition]); + One.setBitVal(SignBitPosition, Val.One[SignBitPosition]); + return KnownBits(Zero, One); + }; + return Flip(umax(Flip(LHS), Flip(RHS))); +} + KnownBits &KnownBits::operator&=(const KnownBits &RHS) { // Result bit is 0 if either operand bit is 0. Zero |= RHS.Zero; diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp index 30ff37536fafc..faf6f7087ac0c 100644 --- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp @@ -719,9 +719,9 @@ TEST_F(AArch64GISelMITest, TestKnownBitsUMax) { KnownBits KnownUmax = Info.getKnownBits(CopyUMax); EXPECT_EQ(64u, KnownUmax.getBitWidth()); - EXPECT_EQ(0u, KnownUmax.Zero.getZExtValue()); + EXPECT_EQ(0xffu, KnownUmax.Zero.getZExtValue()); EXPECT_EQ(0xffffffffffffff00, KnownUmax.One.getZExtValue()); - EXPECT_EQ(0u, KnownUmax.Zero.getZExtValue()); + EXPECT_EQ(0xffu, KnownUmax.Zero.getZExtValue()); EXPECT_EQ(0xffffffffffffff00, KnownUmax.One.getZExtValue()); } diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp index 694e5c4dcc712..89555a5881a53 100644 --- a/llvm/unittests/Support/KnownBitsTest.cpp +++ b/llvm/unittests/Support/KnownBitsTest.cpp @@ -103,13 +103,15 @@ TEST(KnownBitsTest, BinaryExhaustive) { unsigned Bits = 4; ForeachKnownBits(Bits, [&](const KnownBits &Known1) { ForeachKnownBits(Bits, [&](const KnownBits &Known2) { - KnownBits KnownAnd(Bits), KnownOr(Bits), KnownXor(Bits); + KnownBits KnownAnd(Bits); KnownAnd.Zero.setAllBits(); KnownAnd.One.setAllBits(); - KnownOr.Zero.setAllBits(); - KnownOr.One.setAllBits(); - KnownXor.Zero.setAllBits(); - KnownXor.One.setAllBits(); + KnownBits KnownOr(KnownAnd); + KnownBits KnownXor(KnownAnd); + KnownBits KnownUMax(KnownAnd); + KnownBits KnownUMin(KnownAnd); + KnownBits KnownSMax(KnownAnd); + KnownBits KnownSMin(KnownAnd); ForeachNumInKnownBits(Known1, [&](const APInt &N1) { ForeachNumInKnownBits(Known2, [&](const APInt &N2) { @@ -126,6 +128,22 @@ TEST(KnownBitsTest, BinaryExhaustive) { Res = N1 ^ N2; KnownXor.One &= Res; KnownXor.Zero &= ~Res; + + Res = APIntOps::umax(N1, N2); + KnownUMax.One &= Res; + KnownUMax.Zero &= ~Res; + + Res = APIntOps::umin(N1, N2); + KnownUMin.One &= Res; + KnownUMin.Zero &= ~Res; + + Res = APIntOps::smax(N1, N2); + KnownSMax.One &= Res; + KnownSMax.Zero &= ~Res; + + Res = APIntOps::smin(N1, N2); + KnownSMin.One &= Res; + KnownSMin.Zero &= ~Res; }); }); @@ -140,6 +158,22 @@ TEST(KnownBitsTest, BinaryExhaustive) { KnownBits ComputedXor = Known1 ^ Known2; EXPECT_EQ(KnownXor.Zero, ComputedXor.Zero); EXPECT_EQ(KnownXor.One, ComputedXor.One); + + KnownBits ComputedUMax = KnownBits::umax(Known1, Known2); + EXPECT_EQ(KnownUMax.Zero, ComputedUMax.Zero); + EXPECT_EQ(KnownUMax.One, ComputedUMax.One); + + KnownBits ComputedUMin = KnownBits::umin(Known1, Known2); + EXPECT_EQ(KnownUMin.Zero, ComputedUMin.Zero); + EXPECT_EQ(KnownUMin.One, ComputedUMin.One); + + KnownBits ComputedSMax = KnownBits::smax(Known1, Known2); + EXPECT_EQ(KnownSMax.Zero, ComputedSMax.Zero); + EXPECT_EQ(KnownSMax.One, ComputedSMax.One); + + KnownBits ComputedSMin = KnownBits::smin(Known1, Known2); + EXPECT_EQ(KnownSMin.Zero, ComputedSMin.Zero); + EXPECT_EQ(KnownSMin.One, ComputedSMin.One); }); }); } From a98b126696ef8edc42d193d2e03048cd0d61ebc2 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Mon, 7 Sep 2020 10:14:22 +0200 Subject: [PATCH 308/465] Add BinaryFormat/ELFRelocs/CSKY.def to LLVM modulemap --- llvm/include/llvm/module.modulemap | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap index 22959c62fc81e..b1d0a703850b9 100644 --- a/llvm/include/llvm/module.modulemap +++ b/llvm/include/llvm/module.modulemap @@ -65,6 +65,7 @@ module LLVM_BinaryFormat { textual header "BinaryFormat/ELFRelocs/ARC.def" textual header "BinaryFormat/ELFRelocs/AVR.def" textual header "BinaryFormat/ELFRelocs/BPF.def" + textual header "BinaryFormat/ELFRelocs/CSKY.def" textual header "BinaryFormat/ELFRelocs/Hexagon.def" textual header "BinaryFormat/ELFRelocs/i386.def" textual header "BinaryFormat/ELFRelocs/Lanai.def" From 40f4131fce787fe7a8596f06cef5fb6a06bf5ded Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Mon, 7 Sep 2020 16:16:38 +0800 Subject: [PATCH 309/465] [DWARFYAML] Make the debug_addr section optional. This patch makes the debug_addr section optional. When an empty debug_addr section is specified, yaml2obj only emits a section header for it. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D87205 --- llvm/include/llvm/ObjectYAML/DWARFYAML.h | 2 +- llvm/lib/ObjectYAML/DWARFEmitter.cpp | 2 +- llvm/lib/ObjectYAML/DWARFYAML.cpp | 2 +- .../test/tools/yaml2obj/ELF/DWARF/debug-addr.yaml | 15 +++++++++++++++ 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h index 19b7f3500ee67..99a7af87d2c78 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h +++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h @@ -215,7 +215,7 @@ struct Data { Optional> DebugStrOffsets; Optional> DebugAranges; std::vector DebugRanges; - std::vector DebugAddr; + Optional> DebugAddr; Optional PubNames; Optional PubTypes; diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index a0a445ae0c9db..bf29f40579ceb 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -594,7 +594,7 @@ Error DWARFYAML::emitDebugLine(raw_ostream &OS, const DWARFYAML::Data &DI) { } Error DWARFYAML::emitDebugAddr(raw_ostream &OS, const Data &DI) { - for (const AddrTableEntry &TableEntry : DI.DebugAddr) { + for (const AddrTableEntry &TableEntry : *DI.DebugAddr) { uint8_t AddrSize; if (TableEntry.AddrSize) AddrSize = *TableEntry.AddrSize; diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 046dddbf9a397..353e5058a0e5d 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -32,7 +32,7 @@ SetVector DWARFYAML::Data::getNonEmptySectionNames() const { SecNames.insert("debug_ranges"); if (!DebugLines.empty()) SecNames.insert("debug_line"); - if (!DebugAddr.empty()) + if (DebugAddr) SecNames.insert("debug_addr"); if (!DebugAbbrev.empty()) SecNames.insert("debug_abbrev"); diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-addr.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-addr.yaml index 52841e167b447..6a8dc84d98aa7 100644 --- a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-addr.yaml +++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-addr.yaml @@ -631,3 +631,18 @@ DWARF: [[SIZENAME]]: 3 Entries: - Address: 0x1234 + +## n) Test that the .debug_addr section header is emitted if the "debug_addr" +## entry is empty. + +# RUN: yaml2obj --docnum=12 %s -o %t12.o +# RUN: llvm-readobj --sections %t12.o | \ +# RUN: FileCheck %s -DSIZE=0 -DADDRALIGN=1 --check-prefix=SHDR + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +DWARF: + debug_addr: [] From 0af4147804aa0aa906a2ac913fe5639639afb9bb Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 25 Aug 2020 12:17:24 +0100 Subject: [PATCH 310/465] [ARM][CostModel] CodeSize costs for i1 arith ops When optimising for size, make the cost of i1 logical operations relatively expensive so that optimisations don't try to combine predicates. Differential Revision: https://reviews.llvm.org/D86525 --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 17 ++++++++++++++++- llvm/test/Analysis/CostModel/ARM/arith.ll | 6 +++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index f3206306a3b60..c789b35f32af5 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1039,13 +1039,28 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { + int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); + if (ST->isThumb() && CostKind == TTI::TCK_CodeSize && Ty->isIntegerTy(1)) { + // Make operations on i1 relatively expensive as this often involves + // combining predicates. AND and XOR should be easier to handle with IT + // blocks. + switch (ISDOpcode) { + default: + break; + case ISD::AND: + case ISD::XOR: + return 2; + case ISD::OR: + return 3; + } + } + // TODO: Handle more cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); - int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); if (ST->hasNEON()) { diff --git a/llvm/test/Analysis/CostModel/ARM/arith.ll b/llvm/test/Analysis/CostModel/ARM/arith.ll index 8513cefe5c119..a1a0d11cac2ec 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith.ll @@ -63,9 +63,9 @@ define void @i1() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i1 undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i1 undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = shl i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i1 undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i1 undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i1 undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %j = or i1 undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i1 undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %c = add i1 undef, undef From 713c2ad60c137a88c0a64cc98f2db4be702a25e9 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 27 Aug 2020 14:26:38 +0100 Subject: [PATCH 311/465] [GlobalISel] Extend not_cmp_fold to work on conditional expressions Differential Revision: https://reviews.llvm.org/D86709 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 4 +- .../include/llvm/Target/GlobalISel/Combine.td | 2 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 93 ++++++++++---- .../prelegalizercombiner-invert-cmp.mir | 118 ++++++++++++++++++ 4 files changed, 192 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index e1f0535affcdb..8607ad02d5063 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -356,8 +356,8 @@ class CombinerHelper { bool matchRedundantSExtInReg(MachineInstr &MI); /// Combine inverting a result of a compare into the opposite cond code. - bool matchNotCmp(MachineInstr &MI, Register &CmpReg); - bool applyNotCmp(MachineInstr &MI, Register &CmpReg); + bool matchNotCmp(MachineInstr &MI, SmallVectorImpl &RegsToNegate); + bool applyNotCmp(MachineInstr &MI, SmallVectorImpl &RegsToNegate); /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 4b0fe43c18684..6a6f97ae78b04 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -373,7 +373,7 @@ def ext_ext_fold: GICombineRule < (apply [{ return Helper.applyCombineExtOfExt(*${root}, ${matchinfo}); }]) >; -def not_cmp_fold_matchinfo : GIDefMatchData<"Register">; +def not_cmp_fold_matchinfo : GIDefMatchData<"SmallVector">; def not_cmp_fold : GICombineRule< (defs root:$d, not_cmp_fold_matchinfo:$info), (match (wip_match_opcode G_XOR): $d, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 6a89060805e09..10cd58f17e9aa 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2243,13 +2243,13 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, isConstTrueVal(TLI, Cst, IsVector, IsFP); } -bool CombinerHelper::matchNotCmp(MachineInstr &MI, Register &CmpReg) { +bool CombinerHelper::matchNotCmp(MachineInstr &MI, + SmallVectorImpl &RegsToNegate) { assert(MI.getOpcode() == TargetOpcode::G_XOR); LLT Ty = MRI.getType(MI.getOperand(0).getReg()); const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering(); Register XorSrc; Register CstReg; - int64_t Cst; // We match xor(src, true) here. if (!mi_match(MI.getOperand(0).getReg(), MRI, m_GXor(m_Reg(XorSrc), m_Reg(CstReg)))) @@ -2258,15 +2258,51 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI, Register &CmpReg) { if (!MRI.hasOneNonDBGUse(XorSrc)) return false; - // Now try match src to either icmp or fcmp. + // Check that XorSrc is the root of a tree of comparisons combined with ANDs + // and ORs. The suffix of RegsToNegate starting from index I is used a work + // list of tree nodes to visit. + RegsToNegate.push_back(XorSrc); + // Remember whether the comparisons are all integer or all floating point. + bool IsInt = false; bool IsFP = false; - if (!mi_match(XorSrc, MRI, m_GICmp(m_Pred(), m_Reg(), m_Reg()))) { - // Try fcmp. - if (!mi_match(XorSrc, MRI, m_GFCmp(m_Pred(), m_Reg(), m_Reg()))) + for (unsigned I = 0; I < RegsToNegate.size(); ++I) { + Register Reg = RegsToNegate[I]; + if (!MRI.hasOneNonDBGUse(Reg)) + return false; + MachineInstr *Def = MRI.getVRegDef(Reg); + switch (Def->getOpcode()) { + default: + // Don't match if the tree contains anything other than ANDs, ORs and + // comparisons. return false; - IsFP = true; + case TargetOpcode::G_ICMP: + if (IsFP) + return false; + IsInt = true; + // When we apply the combine we will invert the predicate. + break; + case TargetOpcode::G_FCMP: + if (IsInt) + return false; + IsFP = true; + // When we apply the combine we will invert the predicate. + break; + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + // Implement De Morgan's laws: + // ~(x & y) -> ~x | ~y + // ~(x | y) -> ~x & ~y + // When we apply the combine we will change the opcode and recursively + // negate the operands. + RegsToNegate.push_back(Def->getOperand(1).getReg()); + RegsToNegate.push_back(Def->getOperand(2).getReg()); + break; + } } + // Now we know whether the comparisons are integer or floating point, check + // the constant in the xor. + int64_t Cst; if (Ty.isVector()) { MachineInstr *CstDef = MRI.getVRegDef(CstReg); auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); @@ -2281,25 +2317,38 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI, Register &CmpReg) { return false; } - CmpReg = XorSrc; return true; } -bool CombinerHelper::applyNotCmp(MachineInstr &MI, Register &CmpReg) { - MachineInstr *CmpDef = MRI.getVRegDef(CmpReg); - assert(CmpDef && "Should have been given an MI reg"); - assert(CmpDef->getOpcode() == TargetOpcode::G_ICMP || - CmpDef->getOpcode() == TargetOpcode::G_FCMP); - - Observer.changingInstr(*CmpDef); - MachineOperand &PredOp = CmpDef->getOperand(1); - CmpInst::Predicate NewP = CmpInst::getInversePredicate( - (CmpInst::Predicate)PredOp.getPredicate()); - PredOp.setPredicate(NewP); - Observer.changedInstr(*CmpDef); +bool CombinerHelper::applyNotCmp(MachineInstr &MI, + SmallVectorImpl &RegsToNegate) { + for (Register Reg : RegsToNegate) { + MachineInstr *Def = MRI.getVRegDef(Reg); + Observer.changingInstr(*Def); + // For each comparison, invert the opcode. For each AND and OR, change the + // opcode. + switch (Def->getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_ICMP: + case TargetOpcode::G_FCMP: { + MachineOperand &PredOp = Def->getOperand(1); + CmpInst::Predicate NewP = CmpInst::getInversePredicate( + (CmpInst::Predicate)PredOp.getPredicate()); + PredOp.setPredicate(NewP); + break; + } + case TargetOpcode::G_AND: + Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR)); + break; + case TargetOpcode::G_OR: + Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND)); + break; + } + Observer.changedInstr(*Def); + } - replaceRegWith(MRI, MI.getOperand(0).getReg(), - CmpDef->getOperand(0).getReg()); + replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); MI.eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-invert-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-invert-cmp.mir index 3356206c4cfca..93f8e4284cd4c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-invert-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-invert-cmp.mir @@ -164,3 +164,121 @@ body: | $q0 = COPY %5(<4 x s32>) RET_ReallyLR implicit $q0 ... +--- +name: icmp_and_icmp +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_and_icmp + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s64), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ule), [[COPY]](s64), [[C]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s64) = COPY $x0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s1) = G_CONSTANT i1 1 + %3:_(s1) = G_ICMP intpred(sgt), %0(s64), %1 + %4:_(s1) = G_ICMP intpred(ugt), %0(s64), %1 + %5:_(s1) = G_AND %3, %4 + %6:_(s1) = G_XOR %5, %2 + %7:_(s32) = G_ANYEXT %6 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 +... +--- +name: icmp_or_icmp +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_or_icmp + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s64), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ule), [[COPY]](s64), [[C]] + ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s64) = COPY $x0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s1) = G_CONSTANT i1 1 + %3:_(s1) = G_ICMP intpred(sgt), %0(s64), %1 + %4:_(s1) = G_ICMP intpred(ugt), %0(s64), %1 + %5:_(s1) = G_OR %3, %4 + %6:_(s1) = G_XOR %5, %2 + %7:_(s32) = G_ANYEXT %6 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 +... +--- +name: icmp_and_icmp_or_icmp +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_and_icmp_or_icmp + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s64), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ule), [[COPY]](s64), [[C]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP1]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[C]] + ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[OR]], [[ICMP2]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s64) = COPY $x0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s1) = G_CONSTANT i1 1 + %3:_(s1) = G_ICMP intpred(sgt), %0(s64), %1 + %4:_(s1) = G_ICMP intpred(ugt), %0(s64), %1 + %5:_(s1) = G_AND %3, %4 + %6:_(s1) = G_ICMP intpred(ne), %0(s64), %1 + %7:_(s1) = G_OR %5, %6 + %8:_(s1) = G_XOR %7, %2 + %9:_(s32) = G_ANYEXT %8 + $w0 = COPY %9(s32) + RET_ReallyLR implicit $w0 +... +--- +name: icmp_and_trunc +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_and_trunc + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[C]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) + ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[TRUNC]] + ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[AND]], [[C1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s64) = COPY $x0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s1) = G_CONSTANT i1 1 + %3:_(s1) = G_ICMP intpred(sgt), %0(s64), %1 + %4:_(s1) = G_TRUNC %0(s64) + %5:_(s1) = G_AND %3, %4 + %6:_(s1) = G_XOR %5, %2 + %7:_(s32) = G_ANYEXT %6 + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 +... From 65f78e73ad574bb73bb625c787850acd261ba53a Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Mon, 7 Sep 2020 09:08:07 +0100 Subject: [PATCH 312/465] [SimplifyCFG] Consider cost of combining predicates. Modify FoldBranchToCommonDest to consider the cost of inserting instructions when attempting to combine predicates to fold blocks. The threshold can be controlled via a new option: -simplifycfg-branch-fold-threshold which defaults to '2' to allow the insertion of a not and another logical operator. Differential Revision: https://reviews.llvm.org/D86526 --- llvm/include/llvm/Transforms/Utils/Local.h | 1 + llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 28 ++- .../SimplifyCFG/ARM/branch-fold-threshold.ll | 162 ++++++++++++------ 3 files changed, 137 insertions(+), 54 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 5ab2dd496282f..fb6f0269a0ac2 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -199,6 +199,7 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr); /// branches to us and one of our successors, fold the setcc into the /// predecessor and use logical operations to pick the right destination. bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr, + const TargetTransformInfo *TTI = nullptr, unsigned BonusInstThreshold = 1); /// This function takes a virtual register computed by an Instruction and diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index ae2471969160c..124a7c423e72c 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -143,6 +143,13 @@ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through")); +// Two is chosen to allow one negation and a logical combine. +static cl::opt + BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, + cl::init(2), + cl::desc("Maximum cost of combining conditions when " + "folding branches")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -2684,12 +2691,16 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, /// and one of our successors, fold the block into the predecessor and use /// logical operations to pick the right destination. bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, + const TargetTransformInfo *TTI, unsigned BonusInstThreshold) { BasicBlock *BB = BI->getParent(); const unsigned PredCount = pred_size(BB); bool Changed = false; + TargetTransformInfo::TargetCostKind CostKind = + BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize + : TargetTransformInfo::TCK_SizeAndLatency; Instruction *Cond = nullptr; if (BI->isConditional()) @@ -2818,6 +2829,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, continue; } + // Check the cost of inserting the necessary logic before performing the + // transformation. + if (TTI && Opc != Instruction::BinaryOpsEnd) { + Type *Ty = BI->getCondition()->getType(); + unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind); + if (InvertPredCond && (!PBI->getCondition()->hasOneUse() || + !isa(PBI->getCondition()))) + Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind); + + if (Cost > BranchFoldThreshold) + continue; + } + LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); Changed = true; @@ -6013,7 +6037,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); return false; } @@ -6076,7 +6100,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) + if (FoldBranchToCommonDest(BI, nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); // We have a conditional branch to two blocks that are only reachable diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll b/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll index 2bcbaff50a973..ffb13ca583f7f 100644 --- a/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll +++ b/llvm/test/Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll @@ -169,19 +169,34 @@ cond.end: } define i32 @or_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 { -; CHECK-LABEL: @or_predicate_minsize( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; THUMB-LABEL: @or_predicate_minsize( +; THUMB-NEXT: entry: +; THUMB-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3 +; THUMB-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]] +; THUMB: lor.lhs.false: +; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]] +; THUMB: cond.false: +; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; THUMB-NEXT: br label [[COND_END]] +; THUMB: cond.end: +; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; THUMB-NEXT: ret i32 [[COND]] +; +; ARM-LABEL: @or_predicate_minsize( +; ARM-NEXT: entry: +; ARM-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3 +; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] +; ARM: cond.false: +; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; ARM-NEXT: br label [[COND_END]] +; ARM: cond.end: +; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; ARM-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp sgt i32 %d, 3 @@ -202,19 +217,34 @@ cond.end: } define i32 @or_invert_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) #0 { -; CHECK-LABEL: @or_invert_predicate_minsize( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; THUMB-LABEL: @or_invert_predicate_minsize( +; THUMB-NEXT: entry: +; THUMB-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 3 +; THUMB-NEXT: br i1 [[CMP]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]] +; THUMB: lor.lhs.false: +; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]] +; THUMB: cond.false: +; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; THUMB-NEXT: br label [[COND_END]] +; THUMB: cond.end: +; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; THUMB-NEXT: ret i32 [[COND]] +; +; ARM-LABEL: @or_invert_predicate_minsize( +; ARM-NEXT: entry: +; ARM-NEXT: [[CMP:%.*]] = icmp sle i32 [[D:%.*]], 3 +; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] +; ARM: cond.false: +; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; ARM-NEXT: br label [[COND_END]] +; ARM: cond.end: +; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; ARM-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp sgt i32 %d, 3 @@ -267,19 +297,33 @@ cond.end: } define i32 @or_xor_predicate_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 { -; CHECK-LABEL: @or_xor_predicate_minsize( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; THUMB-LABEL: @or_xor_predicate_minsize( +; THUMB-NEXT: entry: +; THUMB-NEXT: br i1 [[CMP:%.*]], label [[LOR_LHS_FALSE:%.*]], label [[COND_END:%.*]] +; THUMB: lor.lhs.false: +; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; THUMB-NEXT: br i1 [[CMP1]], label [[COND_END]], label [[COND_FALSE:%.*]] +; THUMB: cond.false: +; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; THUMB-NEXT: br label [[COND_END]] +; THUMB: cond.end: +; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; THUMB-NEXT: ret i32 [[COND]] +; +; ARM-LABEL: @or_xor_predicate_minsize( +; ARM-NEXT: entry: +; ARM-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true +; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; ARM-NEXT: [[OR_COND:%.*]] = or i1 [[CMP_NOT]], [[CMP1]] +; ARM-NEXT: br i1 [[OR_COND]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]] +; ARM: cond.false: +; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; ARM-NEXT: br label [[COND_END]] +; ARM: cond.end: +; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; ARM-NEXT: ret i32 [[COND]] ; entry: br i1 %cmp, label %lor.lhs.false, label %cond.end @@ -331,19 +375,33 @@ cond.end: } define i32 @and_xor_minsize(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input, i1 %cmp) #0 { -; CHECK-LABEL: @and_xor_minsize( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; THUMB-LABEL: @and_xor_minsize( +; THUMB-NEXT: entry: +; THUMB-NEXT: br i1 [[CMP:%.*]], label [[COND_END:%.*]], label [[LOR_LHS_FALSE:%.*]] +; THUMB: lor.lhs.false: +; THUMB-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; THUMB-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; THUMB-NEXT: br i1 [[CMP1]], label [[COND_FALSE:%.*]], label [[COND_END]] +; THUMB: cond.false: +; THUMB-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; THUMB-NEXT: br label [[COND_END]] +; THUMB: cond.end: +; THUMB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[LOR_LHS_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; THUMB-NEXT: ret i32 [[COND]] +; +; ARM-LABEL: @and_xor_minsize( +; ARM-NEXT: entry: +; ARM-NEXT: [[CMP_NOT:%.*]] = xor i1 [[CMP:%.*]], true +; ARM-NEXT: [[ADD:%.*]] = add nsw i32 [[C:%.*]], [[A:%.*]] +; ARM-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ADD]], [[B:%.*]] +; ARM-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_NOT]], [[CMP1]] +; ARM-NEXT: br i1 [[OR_COND]], label [[COND_FALSE:%.*]], label [[COND_END:%.*]] +; ARM: cond.false: +; ARM-NEXT: [[TMP0:%.*]] = load i32, i32* [[INPUT:%.*]], align 4 +; ARM-NEXT: br label [[COND_END]] +; ARM: cond.end: +; ARM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_FALSE]] ], [ 0, [[ENTRY:%.*]] ] +; ARM-NEXT: ret i32 [[COND]] ; entry: br i1 %cmp, label %cond.end, label %lor.lhs.false From e7bd058c7e2cb2c675a4b78ec770ea725bff8c64 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 7 Sep 2020 09:26:05 +0100 Subject: [PATCH 313/465] [clang-format] Allow configuring list of macros that map to attributes This adds a `AttributeMacros` configuration option that causes certain identifiers to be parsed like a __attribute__((foo)) annotation. This is motivated by our CHERI C/C++ fork which adds a __capability qualifier for pointer/reference. Without this change clang-format parses many type declarations as multiplications/bitwise-and instead. I initially considered adding "__capability" as a new clang-format keyword, but having a list of macros that should be treated as attributes is more flexible since it can be used e.g. for static analyzer annotations or other language extensions. Example: std::vector -> std::vector Depends on D86775 (to apply cleanly) Reviewed By: MyDeveloperDay, jrtc27 Differential Revision: https://reviews.llvm.org/D86782 --- clang/docs/ClangFormatStyleOptions.rst | 17 +++++ clang/include/clang/Format/Format.h | 19 ++++++ clang/lib/Format/Format.cpp | 2 + clang/lib/Format/FormatToken.h | 4 +- clang/lib/Format/FormatTokenLexer.cpp | 2 + clang/lib/Format/TokenAnnotator.cpp | 11 +-- clang/unittests/Format/FormatTest.cpp | 92 ++++++++++++++++++++++++-- 7 files changed, 137 insertions(+), 10 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index c35718b51248c..72a25032151ff 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -758,7 +758,24 @@ the configuration (without a prefix: ``Auto``). int bbbbbbbbbbbbbbbbbbbbb) { } +**AttributeMacros** (``std::vector``) + A vector of strings that should be interpreted as attributes/qualifiers + instead of identifiers. This can be useful for language extensions or + static analyzer annotations: + .. code-block:: c++ + + x = (char *__capability)&y; + int function(void) __ununsed; + void only_writes_to_buffer(char *__output buffer); + + In the .clang-format configuration file, this can be configured like: + + .. code-block:: yaml + + AttributeMacros: ['__capability', '__output', '__ununsed'] + + For example: __capability. **BinPackArguments** (``bool``) If ``false``, a function call's arguments will either be all on the diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 269eab971a2cb..6bb828d60071f 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -583,6 +583,24 @@ struct FormatStyle { /// The template declaration breaking style to use. BreakTemplateDeclarationsStyle AlwaysBreakTemplateDeclarations; + /// A vector of strings that should be interpreted as attributes/qualifiers + /// instead of identifiers. This can be useful for language extensions or + /// static analyzer annotations. + /// + /// For example: + /// \code + /// x = (char *__capability)&y; + /// int function(void) __ununsed; + /// void only_writes_to_buffer(char *__output buffer); + /// \endcode + /// + /// In the .clang-format configuration file, this can be configured like: + /// \code{.yaml} + /// AttributeMacros: ['__capability', '__output', '__ununsed'] + /// \endcode + /// + std::vector AttributeMacros; + /// If ``false``, a function call's arguments will either be all on the /// same line or will have one line each. /// \code @@ -2351,6 +2369,7 @@ struct FormatStyle { R.AlwaysBreakBeforeMultilineStrings && AlwaysBreakTemplateDeclarations == R.AlwaysBreakTemplateDeclarations && + AttributeMacros == R.AttributeMacros && BinPackArguments == R.BinPackArguments && BinPackParameters == R.BinPackParameters && BreakBeforeBinaryOperators == R.BreakBeforeBinaryOperators && diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index fe11cba9bfdf0..5dda2bda06b54 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -475,6 +475,7 @@ template <> struct MappingTraits { Style.AlwaysBreakBeforeMultilineStrings); IO.mapOptional("AlwaysBreakTemplateDeclarations", Style.AlwaysBreakTemplateDeclarations); + IO.mapOptional("AttributeMacros", Style.AttributeMacros); IO.mapOptional("BinPackArguments", Style.BinPackArguments); IO.mapOptional("BinPackParameters", Style.BinPackParameters); IO.mapOptional("BraceWrapping", Style.BraceWrapping); @@ -842,6 +843,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; LLVMStyle.AlwaysBreakTemplateDeclarations = FormatStyle::BTDS_MultiLine; + LLVMStyle.AttributeMacros.push_back("__capability"); LLVMStyle.BinPackArguments = true; LLVMStyle.BinPackParameters = true; LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index ad72a95062abe..795c268896294 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -29,6 +29,7 @@ namespace format { TYPE(ArrayInitializerLSquare) \ TYPE(ArraySubscriptLSquare) \ TYPE(AttributeColon) \ + TYPE(AttributeMacro) \ TYPE(AttributeParen) \ TYPE(AttributeSquare) \ TYPE(BinaryOperator) \ @@ -442,7 +443,8 @@ struct FormatToken { bool canBePointerOrReferenceQualifier() const { return isOneOf(tok::kw_const, tok::kw_restrict, tok::kw_volatile, tok::kw___attribute, tok::kw__Nonnull, tok::kw__Nullable, - tok::kw__Null_unspecified, tok::kw___ptr32, tok::kw___ptr64); + tok::kw__Null_unspecified, tok::kw___ptr32, tok::kw___ptr64, + TT_AttributeMacro); } /// Determine whether the token is a simple-type-specifier. diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 1fd153d1112eb..f6db58acd8dbe 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -39,6 +39,8 @@ FormatTokenLexer::FormatTokenLexer( for (const std::string &ForEachMacro : Style.ForEachMacros) Macros.insert({&IdentTable.get(ForEachMacro), TT_ForEachMacro}); + for (const std::string &AttributeMacro : Style.AttributeMacros) + Macros.insert({&IdentTable.get(AttributeMacro), TT_AttributeMacro}); for (const std::string &StatementMacro : Style.StatementMacros) Macros.insert({&IdentTable.get(StatementMacro), TT_StatementMacro}); for (const std::string &TypenameMacro : Style.TypenameMacros) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index f04f101f04593..fc6a226dc4a12 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1333,11 +1333,12 @@ class AnnotatingParser { // Reset token type in case we have already looked at it and then // recovered from an error (e.g. failure to find the matching >). if (!CurrentToken->isOneOf( - TT_LambdaLSquare, TT_LambdaLBrace, TT_ForEachMacro, - TT_TypenameMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral, - TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, TT_NamespaceMacro, - TT_OverloadedOperator, TT_RegexLiteral, TT_TemplateString, - TT_ObjCStringLiteral, TT_UntouchableMacroFunc)) + TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, + TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace, + TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_JsFatArrow, + TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator, + TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral, + TT_UntouchableMacroFunc)) CurrentToken->setType(TT_Unknown); CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index a2d694947990f..f224ab03271d9 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -8040,7 +8040,20 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("vector v;"); verifyFormat("vector v;"); verifyFormat("vector v;"); + verifyFormat("vector v;"); + FormatStyle CustomQualifier = getLLVMStyle(); + // Add indentifers that should not be parsed as a qualifier by default. + CustomQualifier.AttributeMacros.push_back("__my_qualifier"); + CustomQualifier.AttributeMacros.push_back("_My_qualifier"); + CustomQualifier.AttributeMacros.push_back("my_other_qualifier"); + verifyFormat("vector parse_as_multiply;"); + verifyFormat("vector v;", CustomQualifier); + verifyFormat("vector parse_as_multiply;"); + verifyFormat("vector v;", CustomQualifier); + verifyFormat("vector parse_as_multiply;"); + verifyFormat("vector v;", CustomQualifier); verifyFormat("vector v;"); + verifyFormat("vector v;"); verifyFormat("vector v;"); verifyFormat("foo();"); verifyFormat("foo();"); @@ -8084,10 +8097,23 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyIndependentOfContext("MACRO(A *[[clang::attr(\"foo\")]] a);"); verifyIndependentOfContext("MACRO(A *__ptr32 a);"); verifyIndependentOfContext("MACRO(A *__ptr64 a);"); + verifyIndependentOfContext("MACRO(A *__capability);"); + verifyIndependentOfContext("MACRO(A &__capability);"); + verifyFormat("MACRO(A *__my_qualifier);"); // type declaration + verifyFormat("void f() { MACRO(A * __my_qualifier); }"); // multiplication + // If we add __my_qualifier to AttributeMacros it should always be parsed as + // a type declaration: + verifyFormat("MACRO(A *__my_qualifier);", CustomQualifier); + verifyFormat("void f() { MACRO(A *__my_qualifier); }", CustomQualifier); + verifyIndependentOfContext("MACRO('0' <= c && c <= '9');"); verifyFormat("void f() { f(float{1}, a * a); }"); // FIXME: Is there a way to make this work? // verifyIndependentOfContext("MACRO(A *a);"); + verifyFormat("MACRO(A &B);"); + verifyFormat("MACRO(A *B);"); + verifyFormat("void f() { MACRO(A * B); }"); + verifyFormat("void f() { MACRO(A & B); }"); verifyFormat("DatumHandle const *operator->() const { return input_; }"); verifyFormat("return options != nullptr && operator==(*options);"); @@ -8137,10 +8163,47 @@ TEST_F(FormatTest, UnderstandsAttributes) { verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa __attribute__((unused))\n" "aaaaaaaaaaaaaaaaaaaaaaa(int i);"); FormatStyle AfterType = getLLVMStyle(); - AfterType.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; + AfterType.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All; verifyFormat("__attribute__((nodebug)) void\n" "foo() {}\n", AfterType); + verifyFormat("__unused void\n" + "foo() {}", + AfterType); + + FormatStyle CustomAttrs = getLLVMStyle(); + CustomAttrs.AttributeMacros.push_back("__unused"); + CustomAttrs.AttributeMacros.push_back("__attr1"); + CustomAttrs.AttributeMacros.push_back("__attr2"); + CustomAttrs.AttributeMacros.push_back("no_underscore_attr"); + verifyFormat("vector v;"); + verifyFormat("vector v;"); + verifyFormat("vector v;"); + // Check that it is parsed as a multiplication without AttributeMacros and + // as a pointer qualifier when we add __attr1/__attr2 to AttributeMacros. + verifyFormat("vector v;"); + verifyFormat("vector v;"); + verifyFormat("vector v;"); + verifyFormat("vector v;"); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + verifyFormat("vector v;", CustomAttrs); + + // Check that these are not parsed as function declarations: + CustomAttrs.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; + CustomAttrs.BreakBeforeBraces = FormatStyle::BS_Allman; + verifyFormat("SomeType s(InitValue);", CustomAttrs); + verifyFormat("SomeType s{InitValue};", CustomAttrs); + verifyFormat("SomeType *__unused s(InitValue);", CustomAttrs); + verifyFormat("SomeType *__unused s{InitValue};", CustomAttrs); + verifyFormat("SomeType s __unused(InitValue);", CustomAttrs); + verifyFormat("SomeType s __unused{InitValue};", CustomAttrs); + verifyFormat("SomeType *__capability s(InitValue);", CustomAttrs); + verifyFormat("SomeType *__capability s{InitValue};", CustomAttrs); } TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) { @@ -8157,6 +8220,7 @@ TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) { verifyFormat("x = (foo *[[clang::attr(\"foo\")]])*v;"); verifyFormat("x = (foo *__ptr32)*v;"); verifyFormat("x = (foo *__ptr64)*v;"); + verifyFormat("x = (foo *__capability)*v;"); // Check that we handle multiple trailing qualifiers and skip them all to // determine that the expression is a cast to a pointer type. @@ -8165,7 +8229,7 @@ TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) { LongPointerLeft.PointerAlignment = FormatStyle::PAS_Left; StringRef AllQualifiers = "const volatile restrict __attribute__((foo)) _Nonnull _Null_unspecified " - "_Nonnull [[clang::attr]] __ptr32 __ptr64"; + "_Nonnull [[clang::attr]] __ptr32 __ptr64 __capability"; verifyFormat(("x = (foo *" + AllQualifiers + ")*v;").str(), LongPointerRight); verifyFormat(("x = (foo* " + AllQualifiers + ")*v;").str(), LongPointerLeft); @@ -8173,6 +8237,20 @@ TEST_F(FormatTest, UnderstandsPointerQualifiersInCast) { verifyFormat("x = (foo *const)&v;"); verifyFormat(("x = (foo *" + AllQualifiers + ")&v;").str(), LongPointerRight); verifyFormat(("x = (foo* " + AllQualifiers + ")&v;").str(), LongPointerLeft); + + // Check custom qualifiers: + FormatStyle CustomQualifier = getLLVMStyleWithColumns(999); + CustomQualifier.AttributeMacros.push_back("__my_qualifier"); + verifyFormat("x = (foo * __my_qualifier) * v;"); // not parsed as qualifier. + verifyFormat("x = (foo *__my_qualifier)*v;", CustomQualifier); + verifyFormat(("x = (foo *" + AllQualifiers + " __my_qualifier)*v;").str(), + CustomQualifier); + verifyFormat(("x = (foo *" + AllQualifiers + " __my_qualifier)&v;").str(), + CustomQualifier); + + // Check that unknown identifiers result in binary operator parsing: + verifyFormat("x = (foo * __unknown_qualifier) * v;"); + verifyFormat("x = (foo * __unknown_qualifier) & v;"); } TEST_F(FormatTest, UnderstandsSquareAttributes) { @@ -13770,9 +13848,9 @@ TEST_F(FormatTest, GetsCorrectBasedOnStyle) { CHECK_PARSE_NESTED_BOOL_FIELD(STRUCT, FIELD, #FIELD) #define CHECK_PARSE(TEXT, FIELD, VALUE) \ - EXPECT_NE(VALUE, Style.FIELD); \ + EXPECT_NE(VALUE, Style.FIELD) << "Initial value already the same!"; \ EXPECT_EQ(0, parseConfiguration(TEXT, &Style).value()); \ - EXPECT_EQ(VALUE, Style.FIELD) + EXPECT_EQ(VALUE, Style.FIELD) << "Unexpected value after parsing!" TEST_F(FormatTest, ParsesConfigurationBools) { FormatStyle Style = {}; @@ -14162,6 +14240,12 @@ TEST_F(FormatTest, ParsesConfiguration) { CHECK_PARSE("ForEachMacros: [BOOST_FOREACH, Q_FOREACH]", ForEachMacros, BoostAndQForeach); + Style.AttributeMacros.clear(); + CHECK_PARSE("BasedOnStyle: LLVM", AttributeMacros, + std::vector{"__capability"}); + CHECK_PARSE("AttributeMacros: [attr1, attr2]", AttributeMacros, + std::vector({"attr1", "attr2"})); + Style.StatementMacros.clear(); CHECK_PARSE("StatementMacros: [QUNUSED]", StatementMacros, std::vector{"QUNUSED"}); From 8aa3b8da5db2ae73bf536b630915eb9f0ddc15cb Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 7 Sep 2020 09:26:16 +0100 Subject: [PATCH 314/465] [clang-format] Handle typename macros inside cast expressions Before: x = (STACK_OF(uint64_t)) & a; After: x = (STACK_OF(uint64_t))&a; Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D86930 --- clang/lib/Format/FormatToken.h | 1 + clang/lib/Format/TokenAnnotator.cpp | 12 +++++++++--- clang/unittests/Format/FormatTest.cpp | 2 ++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 795c268896294..a9aeef5e9e52f 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -102,6 +102,7 @@ namespace format { TYPE(TrailingReturnArrow) \ TYPE(TrailingUnaryOperator) \ TYPE(TypenameMacro) \ + TYPE(TypenameMacroParen) \ TYPE(UnaryOperator) \ TYPE(UntouchableMacroFunc) \ TYPE(CSharpStringLiteral) \ diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index fc6a226dc4a12..097843bdca84d 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -244,6 +244,8 @@ class AnnotatingParser { Contexts.back().IsExpression = false; } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { Left->setType(TT_AttributeParen); + } else if (Left->Previous && Left->Previous->is(TT_TypenameMacro)) { + Left->setType(TT_TypenameMacroParen); } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) { // The first argument to a foreach macro is a declaration. Contexts.back().IsForEachMacro = true; @@ -335,6 +337,8 @@ class AnnotatingParser { if (Left->is(TT_AttributeParen)) CurrentToken->setType(TT_AttributeParen); + if (Left->is(TT_TypenameMacroParen)) + CurrentToken->setType(TT_TypenameMacroParen); if (Left->Previous && Left->Previous->is(TT_JavaAnnotation)) CurrentToken->setType(TT_JavaAnnotation); if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation)) @@ -1855,9 +1859,11 @@ class AnnotatingParser { } return T && T->is(TT_PointerOrReference); }; - bool ParensAreType = !Tok.Previous || Tok.Previous->is(TT_TemplateCloser) || - Tok.Previous->isSimpleTypeSpecifier() || - IsQualifiedPointerOrReference(Tok.Previous); + bool ParensAreType = + !Tok.Previous || + Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypenameMacroParen) || + Tok.Previous->isSimpleTypeSpecifier() || + IsQualifiedPointerOrReference(Tok.Previous); bool ParensCouldEndDecl = Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater); if (ParensAreType && !ParensCouldEndDecl) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index f224ab03271d9..be68da6f2ef6e 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -16557,6 +16557,8 @@ TEST_F(FormatTest, TypenameMacros) { Macros.PointerAlignment = FormatStyle::PAS_Left; verifyFormat("STACK_OF(int)* a;", Macros); verifyFormat("STACK_OF(int*)* a;", Macros); + verifyFormat("x = (STACK_OF(uint64_t))*a;", Macros); + verifyFormat("x = (STACK_OF(uint64_t))&a;", Macros); } TEST_F(FormatTest, AmbersandInLamda) { From cd01eec14bc045a8616604cadf94dba025090ba5 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 7 Sep 2020 09:26:47 +0100 Subject: [PATCH 315/465] [clang-format] Check that */& after typename macros are pointers/references Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D86950 --- clang/unittests/Format/FormatTest.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index be68da6f2ef6e..978c22c6ee69a 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -8041,6 +8041,14 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("vector v;"); verifyFormat("vector v;"); verifyFormat("vector v;"); + FormatStyle TypeMacros = getLLVMStyle(); + TypeMacros.TypenameMacros = {"LIST"}; + verifyFormat("vector v;", TypeMacros); + verifyFormat("vector v;", TypeMacros); + verifyFormat("vector v;", TypeMacros); + verifyFormat("vector v;", TypeMacros); + verifyFormat("vector v;", TypeMacros); // multiplication + FormatStyle CustomQualifier = getLLVMStyle(); // Add indentifers that should not be parsed as a qualifier by default. CustomQualifier.AttributeMacros.push_back("__my_qualifier"); @@ -8105,6 +8113,9 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { // a type declaration: verifyFormat("MACRO(A *__my_qualifier);", CustomQualifier); verifyFormat("void f() { MACRO(A *__my_qualifier); }", CustomQualifier); + // Also check that TypenameMacros prevents parsing it as multiplication: + verifyIndependentOfContext("MACRO(LIST(uint64_t) * a);"); // multiplication + verifyIndependentOfContext("MACRO(LIST(uint64_t) *a);", TypeMacros); // type verifyIndependentOfContext("MACRO('0' <= c && c <= '9');"); verifyFormat("void f() { f(float{1}, a * a); }"); @@ -16553,12 +16564,15 @@ TEST_F(FormatTest, TypenameMacros) { verifyFormat("STACK_OF(LIST(int)) a, b;", Macros); verifyFormat("for (LIST(int) *a = NULL; a;) {\n}", Macros); verifyFormat("STACK_OF(int) f(LIST(int) *arg);", Macros); + verifyFormat("vector x;", Macros); + verifyFormat("vector f(LIST(uint64_t) *arg);", Macros); Macros.PointerAlignment = FormatStyle::PAS_Left; verifyFormat("STACK_OF(int)* a;", Macros); verifyFormat("STACK_OF(int*)* a;", Macros); verifyFormat("x = (STACK_OF(uint64_t))*a;", Macros); verifyFormat("x = (STACK_OF(uint64_t))&a;", Macros); + verifyFormat("vector x;", Macros); } TEST_F(FormatTest, AmbersandInLamda) { From 56fa7d1dc6a8d23111ff84171036f333cf9cddf2 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Tue, 1 Sep 2020 18:09:07 +0100 Subject: [PATCH 316/465] [clang-format] Fix formatting of _Atomic() qualifier Before: _Atomic(uint64_t) * a; After: _Atomic(uint64_t) *a; This treats _Atomic the same as the the TypenameMacros and decltype. It also allows some cleanup by removing checks whether the token before a paren is kw_decltype and instead checking for TT_TypeDeclarationParen. While touching this code also extend the decltype test cases to also check for typeof() and _Atomic(T). Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D86959 --- clang/lib/Format/FormatToken.cpp | 1 + clang/lib/Format/FormatToken.h | 3 +- clang/lib/Format/TokenAnnotator.cpp | 54 +++++++++++----------- clang/unittests/Format/FormatTest.cpp | 65 +++++++++++++++++++++++++++ 4 files changed, 94 insertions(+), 29 deletions(-) diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 4bc865b043fd2..8e4994f4c0d57 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -62,6 +62,7 @@ bool FormatToken::isSimpleTypeSpecifier() const { case tok::kw_char32_t: case tok::kw_typeof: case tok::kw_decltype: + case tok::kw__Atomic: return true; default: return false; diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index a9aeef5e9e52f..8253bf18fc667 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -101,8 +101,8 @@ namespace format { TYPE(TrailingAnnotation) \ TYPE(TrailingReturnArrow) \ TYPE(TrailingUnaryOperator) \ + TYPE(TypeDeclarationParen) \ TYPE(TypenameMacro) \ - TYPE(TypenameMacroParen) \ TYPE(UnaryOperator) \ TYPE(UntouchableMacroFunc) \ TYPE(CSharpStringLiteral) \ @@ -526,6 +526,7 @@ struct FormatToken { case tok::kw_decltype: case tok::kw_noexcept: case tok::kw_static_assert: + case tok::kw__Atomic: case tok::kw___attribute: return true; default: diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 097843bdca84d..0239dbd63d94e 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -185,6 +185,8 @@ class AnnotatingParser { if (!CurrentToken) return false; FormatToken *Left = CurrentToken->Previous; + FormatToken *PrevNonComment = + Left ? Left->getPreviousNonComment() : nullptr; Left->ParentBracket = Contexts.back().ContextKind; ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); @@ -216,9 +218,8 @@ class AnnotatingParser { // export type X = (...); Contexts.back().IsExpression = false; } else if (Left->Previous && - (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype, - tok::kw_while, tok::l_paren, - tok::comma) || + (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_while, + tok::l_paren, tok::comma) || Left->Previous->isIf() || Left->Previous->is(TT_BinaryOperator))) { // static_assert, if and while usually contain expressions. @@ -242,10 +243,15 @@ class AnnotatingParser { } else if (Contexts[Contexts.size() - 2].CaretFound) { // This is the parameter list of an ObjC block. Contexts.back().IsExpression = false; - } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { + } else if (PrevNonComment && PrevNonComment->is(tok::kw___attribute)) { Left->setType(TT_AttributeParen); - } else if (Left->Previous && Left->Previous->is(TT_TypenameMacro)) { - Left->setType(TT_TypenameMacroParen); + } else if (PrevNonComment && + PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype, + tok::kw_typeof, tok::kw__Atomic)) { + Left->setType(TT_TypeDeclarationParen); + // decltype() and typeof() usually contain expressions. + if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof)) + Contexts.back().IsExpression = true; } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) { // The first argument to a foreach macro is a declaration. Contexts.back().IsForEachMacro = true; @@ -337,8 +343,8 @@ class AnnotatingParser { if (Left->is(TT_AttributeParen)) CurrentToken->setType(TT_AttributeParen); - if (Left->is(TT_TypenameMacroParen)) - CurrentToken->setType(TT_TypenameMacroParen); + if (Left->is(TT_TypeDeclarationParen)) + CurrentToken->setType(TT_TypeDeclarationParen); if (Left->Previous && Left->Previous->is(TT_JavaAnnotation)) CurrentToken->setType(TT_JavaAnnotation); if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation)) @@ -944,9 +950,9 @@ class AnnotatingParser { return false; if (Line.MustBeDeclaration && Contexts.size() == 1 && !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) && - (!Tok->Previous || - !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute, - TT_LeadingJavaAnnotation))) + !Tok->is(TT_TypeDeclarationParen) && + (!Tok->Previous || !Tok->Previous->isOneOf(tok::kw___attribute, + TT_LeadingJavaAnnotation))) Line.MightBeFunctionDecl = true; break; case tok::l_square: @@ -1758,9 +1764,8 @@ class AnnotatingParser { PreviousNotConst->MatchingParen->Previous->isNot(tok::period) && PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); - if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen && - PreviousNotConst->MatchingParen->Previous && - PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype)) + if (PreviousNotConst->is(tok::r_paren) && + PreviousNotConst->is(TT_TypeDeclarationParen)) return true; return (!IsPPKeyword && @@ -1861,7 +1866,7 @@ class AnnotatingParser { }; bool ParensAreType = !Tok.Previous || - Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypenameMacroParen) || + Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) || Tok.Previous->isSimpleTypeSpecifier() || IsQualifiedPointerOrReference(Tok.Previous); bool ParensCouldEndDecl = @@ -1931,6 +1936,9 @@ class AnnotatingParser { if (PrevToken->is(tok::coloncolon)) return TT_PointerOrReference; + if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen)) + return TT_PointerOrReference; + if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, tok::comma, tok::semi, tok::kw_return, tok::colon, tok::equal, tok::kw_delete, tok::kw_sizeof, @@ -1946,15 +1954,6 @@ class AnnotatingParser { if (NextToken->isOneOf(tok::comma, tok::semi)) return TT_PointerOrReference; - if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) { - FormatToken *TokenBeforeMatchingParen = - PrevToken->MatchingParen->getPreviousNonComment(); - if (TokenBeforeMatchingParen && - TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype, - TT_TypenameMacro)) - return TT_PointerOrReference; - } - if (PrevToken->Tok.isLiteral() || PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, tok::kw_false, tok::r_brace) || @@ -2848,9 +2847,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return true; FormatToken *TokenBeforeMatchingParen = Left.MatchingParen->getPreviousNonComment(); - if (!TokenBeforeMatchingParen || - !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype, - TT_TypenameMacro)) + if (!TokenBeforeMatchingParen || !Left.is(TT_TypeDeclarationParen)) return true; } return (Left.Tok.isLiteral() || @@ -3948,7 +3945,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Left.is(tok::equal) && Right.is(tok::l_brace) && !Style.Cpp11BracedListStyle) return false; - if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen)) + if (Left.is(tok::l_paren) && + Left.isOneOf(TT_AttributeParen, TT_TypeDeclarationParen)) return false; if (Left.is(tok::l_paren) && Left.Previous && (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 978c22c6ee69a..a5943847882fb 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -168,6 +168,8 @@ TEST_F(FormatTest, NestedNameSpecifiers) { verifyFormat("vector<::Type> v;"); verifyFormat("::ns::SomeFunction(::ns::SomeOtherFunction())"); verifyFormat("static constexpr bool Bar = decltype(bar())::value;"); + verifyFormat("static constexpr bool Bar = typeof(bar())::value;"); + verifyFormat("static constexpr bool Bar = _Atomic(bar())::value;"); verifyFormat("bool a = 2 < ::SomeFunction();"); verifyFormat("ALWAYS_INLINE ::std::string getName();"); verifyFormat("some::string getName();"); @@ -7904,7 +7906,10 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("auto PointerBinding = [](const char *S) {};"); verifyFormat("typedef typeof(int(int, int)) *MyFunc;"); verifyFormat("[](const decltype(*a) &value) {}"); + verifyFormat("[](const typeof(*a) &value) {}"); + verifyFormat("[](const _Atomic(a *) &value) {}"); verifyFormat("decltype(a * b) F();"); + verifyFormat("typeof(a * b) F();"); verifyFormat("#define MACRO() [](A *a) { return 1; }"); verifyFormat("Constructor() : member([](A *a, B *b) {}) {}"); verifyIndependentOfContext("typedef void (*f)(int *a);"); @@ -7970,6 +7975,8 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("delete *x;", Left); verifyFormat("typedef typeof(int(int, int))* MyFuncPtr;", Left); verifyFormat("[](const decltype(*a)* ptr) {}", Left); + verifyFormat("[](const typeof(*a)* ptr) {}", Left); + verifyFormat("[](const _Atomic(a*)* ptr) {}", Left); verifyFormat("typedef typeof /*comment*/ (int(int, int))* MyFuncPtr;", Left); verifyFormat("auto x(A&&, B&&, C&&) -> D;", Left); verifyFormat("auto x = [](A&&, B&&, C&&) -> D {};", Left); @@ -8066,6 +8073,8 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("foo();"); verifyFormat("foo();"); verifyFormat("decltype(*::std::declval()) void F();"); + verifyFormat("typeof(*::std::declval()) void F();"); + verifyFormat("_Atomic(*::std::declval()) void F();"); verifyFormat( "template ::value &&\n" @@ -8089,6 +8098,9 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyIndependentOfContext("MACRO(int *i);"); verifyIndependentOfContext("MACRO(auto *a);"); verifyIndependentOfContext("MACRO(const A *a);"); + verifyIndependentOfContext("MACRO(_Atomic(A) *a);"); + verifyIndependentOfContext("MACRO(decltype(A) *a);"); + verifyIndependentOfContext("MACRO(typeof(A) *a);"); verifyIndependentOfContext("MACRO(A *const a);"); verifyIndependentOfContext("MACRO(A *restrict a);"); verifyIndependentOfContext("MACRO(A *__restrict__ a);"); @@ -8639,6 +8651,10 @@ TEST_F(FormatTest, BreaksLongDeclarations) { "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); verifyFormat("decltype(LoooooooooooooooooooooooooooooooooooooooongName)\n" "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); + verifyFormat("typeof(LoooooooooooooooooooooooooooooooooooooooooongName)\n" + "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); + verifyFormat("_Atomic(LooooooooooooooooooooooooooooooooooooooooongName)\n" + "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); verifyFormat("LoooooooooooooooooooooooooooooooooooooooongReturnType\n" "LooooooooooooooooooooooooooongFunctionDeclaration(T... t);"); verifyFormat("LoooooooooooooooooooooooooooooooooooooooongReturnType\n" @@ -8988,6 +9004,8 @@ TEST_F(FormatTest, LayoutCxx11BraceInitializers) { verifyFormat("int foo(int i) { return fo1{}(i); }"); verifyFormat("int foo(int i) { return fo1{}(i); }"); verifyFormat("auto i = decltype(x){};"); + verifyFormat("auto i = typeof(x){};"); + verifyFormat("auto i = _Atomic(x){};"); verifyFormat("std::vector v = {1, 0 /* comment */};"); verifyFormat("Node n{1, Node{1000}, //\n" " 2};"); @@ -11580,6 +11598,8 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) { verifyFormat("auto i = std::make_unique(5);", NoSpace); verifyFormat("size_t x = sizeof(x);", NoSpace); verifyFormat("auto f(int x) -> decltype(x);", NoSpace); + verifyFormat("auto f(int x) -> typeof(x);", NoSpace); + verifyFormat("auto f(int x) -> _Atomic(x);", NoSpace); verifyFormat("int f(T x) noexcept(x.create());", NoSpace); verifyFormat("alignas(128) char a[128];", NoSpace); verifyFormat("size_t x = alignof(MyType);", NoSpace); @@ -11628,6 +11648,8 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) { verifyFormat("auto i = std::make_unique (5);", Space); verifyFormat("size_t x = sizeof (x);", Space); verifyFormat("auto f (int x) -> decltype (x);", Space); + verifyFormat("auto f (int x) -> typeof (x);", Space); + verifyFormat("auto f (int x) -> _Atomic (x);", Space); verifyFormat("int f (T x) noexcept (x.create ());", Space); verifyFormat("alignas (128) char a[128];", Space); verifyFormat("size_t x = alignof (MyType);", Space); @@ -11680,6 +11702,8 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) { verifyFormat("auto i = std::make_unique (5);", SomeSpace); verifyFormat("size_t x = sizeof (x);", SomeSpace); verifyFormat("auto f (int x) -> decltype (x);", SomeSpace); + verifyFormat("auto f (int x) -> typeof (x);", SomeSpace); + verifyFormat("auto f (int x) -> _Atomic (x);", SomeSpace); verifyFormat("int f (T x) noexcept (x.create());", SomeSpace); verifyFormat("alignas (128) char a[128];", SomeSpace); verifyFormat("size_t x = alignof (MyType);", SomeSpace); @@ -14934,6 +14958,8 @@ TEST_F(FormatTest, FormatsLambdas) { "});")); verifyFormat("void f() {\n" " SomeFunction([](decltype(x), A *a) {});\n" + " SomeFunction([](typeof(x), A *a) {});\n" + " SomeFunction([](_Atomic(x), A *a) {});\n" "}"); verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" " [](const aaaaaaaaaa &a) { return a; });"); @@ -16575,6 +16601,45 @@ TEST_F(FormatTest, TypenameMacros) { verifyFormat("vector x;", Macros); } +TEST_F(FormatTest, AtomicQualifier) { + // Check that we treate _Atomic as a type and not a function call + FormatStyle Google = getGoogleStyleWithColumns(0); + verifyFormat("struct foo {\n" + " int a1;\n" + " _Atomic(a) a2;\n" + " _Atomic(_Atomic(int) *const) a3;\n" + "};", + Google); + verifyFormat("_Atomic(uint64_t) a;"); + verifyFormat("_Atomic(uint64_t) *a;"); + verifyFormat("_Atomic(uint64_t const *) *a;"); + verifyFormat("_Atomic(uint64_t *const) *a;"); + verifyFormat("_Atomic(const uint64_t *) *a;"); + verifyFormat("_Atomic(uint64_t) a;"); + verifyFormat("_Atomic(_Atomic(uint64_t)) a;"); + verifyFormat("_Atomic(_Atomic(uint64_t)) a, b;"); + verifyFormat("for (_Atomic(uint64_t) *a = NULL; a;) {\n}"); + verifyFormat("_Atomic(uint64_t) f(_Atomic(uint64_t) *arg);"); + + verifyFormat("_Atomic(uint64_t) *s(InitValue);"); + verifyFormat("_Atomic(uint64_t) *s{InitValue};"); + FormatStyle Style = getLLVMStyle(); + Style.PointerAlignment = FormatStyle::PAS_Left; + verifyFormat("_Atomic(uint64_t)* s(InitValue);", Style); + verifyFormat("_Atomic(uint64_t)* s{InitValue};", Style); + verifyFormat("_Atomic(int)* a;", Style); + verifyFormat("_Atomic(int*)* a;", Style); + verifyFormat("vector<_Atomic(uint64_t)* attr> x;", Style); + + Style.SpacesInCStyleCastParentheses = true; + Style.SpacesInParentheses = false; + verifyFormat("x = ( _Atomic(uint64_t) )*a;", Style); + Style.SpacesInCStyleCastParentheses = false; + Style.SpacesInParentheses = true; + verifyFormat("x = (_Atomic( uint64_t ))*a;", Style); + verifyFormat("x = (_Atomic( uint64_t ))&a;", Style); +} + TEST_F(FormatTest, AmbersandInLamda) { // Test case reported in https://bugs.llvm.org/show_bug.cgi?id=41899 FormatStyle AlignStyle = getLLVMStyle(); From 9a22eba15091ea849fa78c09ac4c9f7260071790 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 7 Sep 2020 09:29:40 +0100 Subject: [PATCH 317/465] [clang-format] Parse __underlying_type(T) as a type Before: MACRO(__underlying_type(A) * a); After: MACRO(__underlying_type(A) *a); Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D86960 --- clang/lib/Format/FormatToken.h | 1 + clang/lib/Format/TokenAnnotator.cpp | 3 ++- clang/unittests/Format/FormatTest.cpp | 11 +++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 8253bf18fc667..76ef99e72d58e 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -528,6 +528,7 @@ struct FormatToken { case tok::kw_static_assert: case tok::kw__Atomic: case tok::kw___attribute: + case tok::kw___underlying_type: return true; default: return false; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 0239dbd63d94e..4867f9e3d6c1a 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -247,7 +247,8 @@ class AnnotatingParser { Left->setType(TT_AttributeParen); } else if (PrevNonComment && PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype, - tok::kw_typeof, tok::kw__Atomic)) { + tok::kw_typeof, tok::kw__Atomic, + tok::kw___underlying_type)) { Left->setType(TT_TypeDeclarationParen); // decltype() and typeof() usually contain expressions. if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof)) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index a5943847882fb..b1d46a27ef43a 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -169,6 +169,7 @@ TEST_F(FormatTest, NestedNameSpecifiers) { verifyFormat("::ns::SomeFunction(::ns::SomeOtherFunction())"); verifyFormat("static constexpr bool Bar = decltype(bar())::value;"); verifyFormat("static constexpr bool Bar = typeof(bar())::value;"); + verifyFormat("static constexpr bool Bar = __underlying_type(bar())::value;"); verifyFormat("static constexpr bool Bar = _Atomic(bar())::value;"); verifyFormat("bool a = 2 < ::SomeFunction();"); verifyFormat("ALWAYS_INLINE ::std::string getName();"); @@ -7908,6 +7909,7 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("[](const decltype(*a) &value) {}"); verifyFormat("[](const typeof(*a) &value) {}"); verifyFormat("[](const _Atomic(a *) &value) {}"); + verifyFormat("[](const __underlying_type(a) &value) {}"); verifyFormat("decltype(a * b) F();"); verifyFormat("typeof(a * b) F();"); verifyFormat("#define MACRO() [](A *a) { return 1; }"); @@ -7977,6 +7979,7 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("[](const decltype(*a)* ptr) {}", Left); verifyFormat("[](const typeof(*a)* ptr) {}", Left); verifyFormat("[](const _Atomic(a*)* ptr) {}", Left); + verifyFormat("[](const __underlying_type(a)* ptr) {}", Left); verifyFormat("typedef typeof /*comment*/ (int(int, int))* MyFuncPtr;", Left); verifyFormat("auto x(A&&, B&&, C&&) -> D;", Left); verifyFormat("auto x = [](A&&, B&&, C&&) -> D {};", Left); @@ -8075,6 +8078,7 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyFormat("decltype(*::std::declval()) void F();"); verifyFormat("typeof(*::std::declval()) void F();"); verifyFormat("_Atomic(*::std::declval()) void F();"); + verifyFormat("__underlying_type(*::std::declval()) void F();"); verifyFormat( "template ::value &&\n" @@ -8101,6 +8105,7 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) { verifyIndependentOfContext("MACRO(_Atomic(A) *a);"); verifyIndependentOfContext("MACRO(decltype(A) *a);"); verifyIndependentOfContext("MACRO(typeof(A) *a);"); + verifyIndependentOfContext("MACRO(__underlying_type(A) *a);"); verifyIndependentOfContext("MACRO(A *const a);"); verifyIndependentOfContext("MACRO(A *restrict a);"); verifyIndependentOfContext("MACRO(A *__restrict__ a);"); @@ -8655,6 +8660,8 @@ TEST_F(FormatTest, BreaksLongDeclarations) { "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); verifyFormat("_Atomic(LooooooooooooooooooooooooooooooooooooooooongName)\n" "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); + verifyFormat("__underlying_type(LooooooooooooooooooooooooooooooongName)\n" + "LooooooooooooooooooooooooooooooooooongFunctionDefinition() {}"); verifyFormat("LoooooooooooooooooooooooooooooooooooooooongReturnType\n" "LooooooooooooooooooooooooooongFunctionDeclaration(T... t);"); verifyFormat("LoooooooooooooooooooooooooooooooooooooooongReturnType\n" @@ -11600,6 +11607,7 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) { verifyFormat("auto f(int x) -> decltype(x);", NoSpace); verifyFormat("auto f(int x) -> typeof(x);", NoSpace); verifyFormat("auto f(int x) -> _Atomic(x);", NoSpace); + verifyFormat("auto f(int x) -> __underlying_type(x);", NoSpace); verifyFormat("int f(T x) noexcept(x.create());", NoSpace); verifyFormat("alignas(128) char a[128];", NoSpace); verifyFormat("size_t x = alignof(MyType);", NoSpace); @@ -11650,6 +11658,7 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) { verifyFormat("auto f (int x) -> decltype (x);", Space); verifyFormat("auto f (int x) -> typeof (x);", Space); verifyFormat("auto f (int x) -> _Atomic (x);", Space); + verifyFormat("auto f (int x) -> __underlying_type (x);", Space); verifyFormat("int f (T x) noexcept (x.create ());", Space); verifyFormat("alignas (128) char a[128];", Space); verifyFormat("size_t x = alignof (MyType);", Space); @@ -11704,6 +11713,7 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) { verifyFormat("auto f (int x) -> decltype (x);", SomeSpace); verifyFormat("auto f (int x) -> typeof (x);", SomeSpace); verifyFormat("auto f (int x) -> _Atomic (x);", SomeSpace); + verifyFormat("auto f (int x) -> __underlying_type (x);", SomeSpace); verifyFormat("int f (T x) noexcept (x.create());", SomeSpace); verifyFormat("alignas (128) char a[128];", SomeSpace); verifyFormat("size_t x = alignof (MyType);", SomeSpace); @@ -14960,6 +14970,7 @@ TEST_F(FormatTest, FormatsLambdas) { " SomeFunction([](decltype(x), A *a) {});\n" " SomeFunction([](typeof(x), A *a) {});\n" " SomeFunction([](_Atomic(x), A *a) {});\n" + " SomeFunction([](__underlying_type(x), A *a) {});\n" "}"); verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" " [](const aaaaaaaaaa &a) { return a; });"); From 05147d33091720e2df929d6fea3b0fd2a657ac61 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 7 Sep 2020 09:29:56 +0100 Subject: [PATCH 318/465] [clang-format] Correctly parse function declarations with TypenameMacros When using the always break after return type setting: Before: SomeType funcdecl(LIST(uint64_t)); After: SomeType funcdecl(LIST(uint64_t));" Reviewed By: MyDeveloperDay Differential Revision: https://reviews.llvm.org/D87007 --- clang/lib/Format/TokenAnnotator.cpp | 2 ++ clang/unittests/Format/FormatTest.cpp | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 4867f9e3d6c1a..5dd6a7a9da40b 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2400,6 +2400,8 @@ static bool isFunctionDeclarationName(const FormatToken &Current, return true; for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; Tok = Tok->Next) { + if (Tok->is(TT_TypeDeclarationParen)) + return true; if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) { Tok = Tok->MatchingParen; continue; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index b1d46a27ef43a..b198efa4af9ec 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -6681,9 +6681,12 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) { Style); // All declarations and definitions should have the return type moved to its - // own - // line. + // own line. Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All; + Style.TypenameMacros = {"LIST"}; + verifyFormat("SomeType\n" + "funcdecl(LIST(uint64_t));", + Style); verifyFormat("class E {\n" " int\n" " f() {\n" From 7634c64b6121ba61a6c72c6b45e3561ad8cf345e Mon Sep 17 00:00:00 2001 From: Pushpinder Singh Date: Thu, 3 Sep 2020 07:57:46 -0400 Subject: [PATCH 319/465] [OpenMP][AMDGPU] Use DS_Max_Warp_Number instead of WARPSIZE The size of worker_rootS should have been DS_Max_Warp_Number. This reduces memory usage by deviceRTL on AMDGPU from around 2.3GB to around 770MB. Reviewed By: JonChesterfield, jdoerfert Differential Revision: https://reviews.llvm.org/D87084 --- openmp/libomptarget/deviceRTLs/common/omptarget.h | 2 +- openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h index 88807de4e19c7..6d5d6cd19bd6e 100644 --- a/openmp/libomptarget/deviceRTLs/common/omptarget.h +++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h @@ -252,7 +252,7 @@ class omptarget_nvptx_TeamDescr { workDescrForActiveParallel; // one, ONLY for the active par ALIGN(16) - __kmpc_data_sharing_worker_slot_static worker_rootS[WARPSIZE]; + __kmpc_data_sharing_worker_slot_static worker_rootS[DS_Max_Warp_Number]; ALIGN(16) __kmpc_data_sharing_master_slot_static master_rootS[1]; }; diff --git a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu index ca2fd1d307542..9b116aba2fc39 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu @@ -26,7 +26,7 @@ INLINE static void data_sharing_init_stack_common() { omptarget_nvptx_TeamDescr *teamDescr = &omptarget_nvptx_threadPrivateContext->TeamContext(); - for (int WID = 0; WID < WARPSIZE; WID++) { + for (int WID = 0; WID < DS_Max_Warp_Number; WID++) { __kmpc_data_sharing_slot *RootS = teamDescr->GetPreallocatedSlotAddr(WID); DataSharingState.SlotPtr[WID] = RootS; DataSharingState.StackPtr[WID] = (void *)&RootS->Data[0]; From 8d64df9f139038b48344dd9f1f20a38b22aba8c9 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Fri, 4 Sep 2020 11:43:00 -0400 Subject: [PATCH 320/465] [mlir][Vector] Revisit VectorToSCF. Vector to SCF conversion still had issues due to the interaction with the natural alignment derived by the LLVM data layout. One traditional workaround is to allocate aligned. However, this does not always work for vector sizes that are non-powers of 2. This revision implements a more portable mechanism where the intermediate allocation is always a memref of elemental vector type. AllocOp is extended to use the natural LLVM DataLayout alignment for non-scalar types, when the alignment is not specified in the first place. An integration test is added that exercises the transfer to scf.for + scalar lowering with a 5x5 transposition. Differential Revision: https://reviews.llvm.org/D87150 --- .../mlir/Dialect/Vector/EDSC/Intrinsics.h | 17 ++-- mlir/include/mlir/Dialect/Vector/VectorOps.td | 29 +++++-- .../Vector/CPU/test-transfer-to-loops.mlir | 81 +++++++++++++++++++ .../StandardToLLVM/StandardToLLVM.cpp | 14 +++- .../Conversion/VectorToSCF/VectorToSCF.cpp | 35 ++++---- mlir/lib/Dialect/Vector/VectorOps.cpp | 24 ++++++ .../convert-static-memref-ops.mlir | 6 +- .../VectorToSCF/vector-to-loops.mlir | 31 +++---- mlir/test/EDSC/builder-api-test.cpp | 2 +- 9 files changed, 186 insertions(+), 53 deletions(-) create mode 100644 mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir diff --git a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h index f353262750345..269d9c1b27af0 100644 --- a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h @@ -16,10 +16,16 @@ namespace intrinsics { using vector_broadcast = ValueBuilder; using vector_contract = ValueBuilder; -using vector_insert = ValueBuilder; -using vector_fma = ValueBuilder; using vector_extract = ValueBuilder; -using vector_extractelement = ValueBuilder; +using vector_extract_element = ValueBuilder; +using vector_extract_slices = ValueBuilder; +using vector_extract_strided_slice = + ValueBuilder; +using vector_fma = ValueBuilder; +using vector_insert = ValueBuilder; +using vector_insert_element = ValueBuilder; +using vector_insert_slices = ValueBuilder; +using vector_insert_strided_slice = ValueBuilder; using vector_matmul = ValueBuilder; using vector_outerproduct = ValueBuilder; using vector_print = OperationBuilder; @@ -27,11 +33,6 @@ using vector_transfer_read = ValueBuilder; using vector_transfer_write = OperationBuilder; using vector_transpose = ValueBuilder; using vector_type_cast = ValueBuilder; -using vector_extract_slices = ValueBuilder; -using vector_insert_slices = ValueBuilder; -using vector_extract_strided_slice = - ValueBuilder; -using vector_insert_strided_slice = ValueBuilder; } // namespace intrinsics } // namespace edsc diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index 22fd036df8148..dceb850ad929c 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -348,15 +348,21 @@ def Vector_ExtractElementOp : %1 = vector.extractelement %0[%c : i32]: vector<16xf32> ``` }]; + let assemblyFormat = [{ + $vector `[` $position `:` type($position) `]` attr-dict `:` type($vector) + }]; + + let builders = [OpBuilder< + "OpBuilder &builder, OperationState &result, Value source, " + "int64_t position">, + OpBuilder< + "OpBuilder &builder, OperationState &result, Value source, " + "Value position">]; let extraClassDeclaration = [{ VectorType getVectorType() { return vector().getType().cast(); } }]; - - let assemblyFormat = [{ - $vector `[` $position `:` type($position) `]` attr-dict `:` type($vector) - }]; } def Vector_ExtractOp : @@ -508,6 +514,17 @@ def Vector_InsertElementOp : %1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32> ``` }]; + let assemblyFormat = [{ + $source `,` $dest `[` $position `:` type($position) `]` attr-dict `:` + type($result) + }]; + + let builders = [OpBuilder< + "OpBuilder &builder, OperationState &result, Value source, " + "Value dest, int64_t position">, + OpBuilder< + "OpBuilder &builder, OperationState &result, Value source, " + "Value dest, Value position">]; let extraClassDeclaration = [{ Type getSourceType() { return source().getType(); } VectorType getDestVectorType() { @@ -515,10 +532,6 @@ def Vector_InsertElementOp : } }]; - let assemblyFormat = [{ - $source `,` $dest `[` $position `:` type($position) `]` attr-dict `:` - type($result) - }]; } def Vector_InsertOp : diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir new file mode 100644 index 0000000000000..8d965779dfc6d --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir @@ -0,0 +1,81 @@ +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#map0 = affine_map<(d0, d1) -> (d1, d0)> + +func @print_memref_f32(memref<*xf32>) + +func @alloc_2d_filled_f32(%arg0: index, %arg1: index) -> memref { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c10 = constant 10 : index + %c100 = constant 100 : index + %0 = alloc(%arg0, %arg1) : memref + scf.for %arg5 = %c0 to %arg0 step %c1 { + scf.for %arg6 = %c0 to %arg1 step %c1 { + %arg66 = muli %arg6, %c100 : index + %tmp1 = addi %arg5, %arg66 : index + %tmp2 = index_cast %tmp1 : index to i32 + %tmp3 = sitofp %tmp2 : i32 to f32 + store %tmp3, %0[%arg5, %arg6] : memref + } + } + return %0 : memref +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %c6 = constant 6 : index + %cst = constant -4.2e+01 : f32 + %0 = call @alloc_2d_filled_f32(%c6, %c6) : (index, index) -> memref + %converted = memref_cast %0 : memref to memref<*xf32> + call @print_memref_f32(%converted): (memref<*xf32>) -> () + // CHECK: Unranked{{.*}}data = + // CHECK: [ + // CHECK-SAME: [0, 100, 200, 300, 400, 500], + // CHECK-NEXT: [1, 101, 201, 301, 401, 501], + // CHECK-NEXT: [2, 102, 202, 302, 402, 502], + // CHECK-NEXT: [3, 103, 203, 303, 403, 503], + // CHECK-NEXT: [4, 104, 204, 304, 404, 504], + // CHECK-NEXT: [5, 105, 205, 305, 405, 505]] + + %init = vector.transfer_read %0[%c1, %c1], %cst : memref, vector<5x5xf32> + vector.print %init : vector<5x5xf32> + // 5x5 block rooted at {1, 1} + // CHECK-NEXT: ( ( 101, 201, 301, 401, 501 ), + // CHECK-SAME: ( 102, 202, 302, 402, 502 ), + // CHECK-SAME: ( 103, 203, 303, 403, 503 ), + // CHECK-SAME: ( 104, 204, 304, 404, 504 ), + // CHECK-SAME: ( 105, 205, 305, 405, 505 ) ) + + %1 = vector.transfer_read %0[%c1, %c1], %cst {permutation_map = #map0} : memref, vector<5x5xf32> + vector.print %1 : vector<5x5xf32> + // Transposed 5x5 block rooted @{1, 1} in memory. + // CHECK-NEXT: ( ( 101, 102, 103, 104, 105 ), + // CHECK-SAME: ( 201, 202, 203, 204, 205 ), + // CHECK-SAME: ( 301, 302, 303, 304, 305 ), + // CHECK-SAME: ( 401, 402, 403, 404, 405 ), + // CHECK-SAME: ( 501, 502, 503, 504, 505 ) ) + + // Transpose-write the transposed 5x5 block @{0, 0} in memory. + vector.transfer_write %1, %0[%c0, %c0] {permutation_map = #map0} : vector<5x5xf32>, memref + + %2 = vector.transfer_read %0[%c1, %c1], %cst : memref, vector<5x5xf32> + vector.print %2 : vector<5x5xf32> + // New 5x5 block rooted @{1, 1} in memory. + // Here we expect the boundaries from the original data + // (i.e. last row: 105 .. 505, last col: 501 .. 505) + // and the 4x4 subblock 202 .. 505 rooted @{0, 0} in the vector + // CHECK-NEXT: ( ( 202, 302, 402, 502, 501 ), + // CHECK-SAME: ( 203, 303, 403, 503, 502 ), + // CHECK-SAME: ( 204, 304, 404, 504, 503 ), + // CHECK-SAME: ( 205, 305, 405, 505, 504 ), + // CHECK-SAME: ( 105, 205, 305, 405, 505 ) ) + + dealloc %0 : memref + return +} diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 401509f1f8a60..55a926ef1423d 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -1893,11 +1893,17 @@ struct AllocLikeOpLowering : public ConvertOpToLLVMPattern { // Adjust the allocation size to consider alignment. if (Optional alignment = allocOp.alignment()) { accessAlignment = createIndexConstant(rewriter, loc, *alignment); - cumulativeSize = rewriter.create( - loc, - rewriter.create(loc, cumulativeSize, accessAlignment), - one); + } else if (!memRefType.getElementType().isSignlessIntOrIndexOrFloat()) { + // In the case where no alignment is specified, we may want to override + // `malloc's` behavior. `malloc` typically aligns at the size of the + // biggest scalar on a target HW. For non-scalars, use the natural + // alignment of the LLVM type given by the LLVM DataLayout. + accessAlignment = + this->getSizeInBytes(loc, memRefType.getElementType(), rewriter); } + if (accessAlignment) + cumulativeSize = + rewriter.create(loc, cumulativeSize, accessAlignment); callArgs.push_back(cumulativeSize); } auto allocFuncSymbol = rewriter.getSymbolRefAttr(allocFunc); diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 3c501f046f074..8f7d43829846b 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -35,8 +35,6 @@ #include "mlir/Pass/Pass.h" #include "mlir/Transforms/Passes.h" -#define ALIGNMENT_SIZE 128 - using namespace mlir; using namespace mlir::edsc; using namespace mlir::edsc::intrinsics; @@ -234,8 +232,7 @@ static Value setAllocAtFunctionEntry(MemRefType memRefMinorVectorType, op->getParentWithTrait(); assert(scope && "Expected op to be inside automatic allocation scope"); b.setInsertionPointToStart(&scope->getRegion(0).front()); - Value res = std_alloca(memRefMinorVectorType, ValueRange{}, - b.getI64IntegerAttr(ALIGNMENT_SIZE)); + Value res = std_alloca(memRefMinorVectorType); return res; } @@ -494,8 +491,10 @@ template MemRefType VectorTransferRewriter::tmpMemRefType( TransferOpTy transfer) const { auto vectorType = transfer.getVectorType(); - return MemRefType::get(vectorType.getShape(), vectorType.getElementType(), {}, - 0); + return MemRefType::get(vectorType.getShape().drop_back(), + VectorType::get(vectorType.getShape().take_back(), + vectorType.getElementType()), + {}, 0); } /// Lowers TransferReadOp into a combination of: @@ -585,8 +584,7 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( steps.push_back(std_constant_index(step)); // 2. Emit alloc-copy-load-dealloc. - Value tmp = std_alloc(tmpMemRefType(transfer), ValueRange{}, - rewriter.getI64IntegerAttr(ALIGNMENT_SIZE)); + Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer); StdIndexedValue local(tmp); Value vec = vector_type_cast(tmp); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { @@ -595,10 +593,15 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( if (coalescedIdx >= 0) std::swap(ivs.back(), ivs[coalescedIdx]); // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - local(ivs) = remote(clip(transfer, memRefBoundsCapture, ivs)); + SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); + ArrayRef indicesRef(indices), ivsRef(ivs); + Value pos = + std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back()); + Value vector = vector_insert_element(remote(indicesRef), + local(ivsRef.drop_back()), pos); + local(ivsRef.drop_back()) = vector; }); Value vectorValue = std_load(vec); - (std_dealloc(tmp)); // vexing parse // 3. Propagate. rewriter.replaceOp(op, vectorValue); @@ -667,8 +670,7 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( steps.push_back(std_constant_index(step)); // 2. Emit alloc-store-copy-dealloc. - Value tmp = std_alloc(tmpMemRefType(transfer), ValueRange{}, - rewriter.getI64IntegerAttr(ALIGNMENT_SIZE)); + Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer); StdIndexedValue local(tmp); Value vec = vector_type_cast(tmp); std_store(vectorValue, vec); @@ -678,10 +680,15 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( if (coalescedIdx >= 0) std::swap(ivs.back(), ivs[coalescedIdx]); // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - remote(clip(transfer, memRefBoundsCapture, ivs)) = local(ivs); + SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); + ArrayRef indicesRef(indices), ivsRef(ivs); + Value pos = + std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back()); + Value scalar = vector_extract_element(local(ivsRef.drop_back()), pos); + remote(indices) = scalar; }); - (std_dealloc(tmp)); // vexing parse... + // 3. Erase. rewriter.eraseOp(op); return success(); } diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 7fa62ea34de19..d00e56297532c 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -537,6 +537,18 @@ Optional> ContractionOp::getShapeForUnroll() { // ExtractElementOp //===----------------------------------------------------------------------===// +void vector::ExtractElementOp::build(OpBuilder &builder, OperationState &result, + Value source, Value position) { + result.addOperands({source, position}); + result.addTypes(source.getType().cast().getElementType()); +} + +void vector::ExtractElementOp::build(OpBuilder &builder, OperationState &result, + Value source, int64_t position) { + Value pos = builder.create(result.location, position, 32); + build(builder, result, source, pos); +} + static LogicalResult verify(vector::ExtractElementOp op) { VectorType vectorType = op.getVectorType(); if (vectorType.getRank() != 1) @@ -1007,6 +1019,18 @@ static ParseResult parseShuffleOp(OpAsmParser &parser, OperationState &result) { // InsertElementOp //===----------------------------------------------------------------------===// +void InsertElementOp::build(OpBuilder &builder, OperationState &result, + Value source, Value dest, Value position) { + result.addOperands({source, dest, position}); + result.addTypes(dest.getType()); +} + +void InsertElementOp::build(OpBuilder &builder, OperationState &result, + Value source, Value dest, int64_t position) { + Value pos = builder.create(result.location, position, 32); + build(builder, result, source, dest, pos); +} + static LogicalResult verify(InsertElementOp op) { auto dstVectorType = op.getDestVectorType(); if (dstVectorType.getRank() != 1) diff --git a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir index b428d37a36167..5cccca3795b3b 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir @@ -130,8 +130,7 @@ func @aligned_1d_alloc() -> memref<42xf32> { // CHECK-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 // CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64 -// CHECK-NEXT: %[[alignmentMinus1:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 -// CHECK-NEXT: %[[allocsize:.*]] = llvm.sub %[[alignmentMinus1]], %[[one_1]] : !llvm.i64 +// CHECK-NEXT: %[[allocsize:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 // CHECK-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm.ptr // CHECK-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr // CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> @@ -154,8 +153,7 @@ func @aligned_1d_alloc() -> memref<42xf32> { // BAREPTR-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 // BAREPTR-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // BAREPTR-NEXT: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[alignmentMinus1:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 -// BAREPTR-NEXT: %[[allocsize:.*]] = llvm.sub %[[alignmentMinus1]], %[[one_1]] : !llvm.i64 +// BAREPTR-NEXT: %[[allocsize:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 // BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm.ptr // BAREPTR-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr // BAREPTR-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir index 986bfe1763515..1a8d1a68a126c 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -19,7 +19,7 @@ func @materialize_read_1d() { // CHECK: %[[FILTERED1:.*]] = select // CHECK: {{.*}} = select // CHECK: %[[FILTERED2:.*]] = select - // CHECK-NEXT: %{{.*}} = load {{.*}}[%[[FILTERED1]], %[[FILTERED2]]] : memref<7x42xf32> + // CHECK: %{{.*}} = load {{.*}}[%[[FILTERED1]], %[[FILTERED2]]] : memref<7x42xf32> } } return @@ -58,6 +58,7 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d // CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_read(%M: index, %N: index, %O: index, %P: index) { %f0 = constant 0.0: f32 + // CHECK-DAG: %[[ALLOC:.*]] = alloca() : memref<5x4xvector<3xf32>> // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[C1:.*]] = constant 1 : index // CHECK-DAG: %[[C3:.*]] = constant 3 : index @@ -68,7 +69,6 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} { // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} { // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 { - // CHECK: %[[ALLOC:.*]] = alloc() {alignment = 128 : i64} : memref<5x4x3xf32> // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { @@ -97,13 +97,15 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: {{.*}} = select // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index // CHECK-NEXT: %[[L3:.*]] = select + // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] // // CHECK-NEXT: {{.*}} = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref - // CHECK-NEXT: store {{.*}}, %[[ALLOC]][%[[I6]], %[[I5]], %[[I4]]] : memref<5x4x3xf32> + // CHECK-NEXT: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %25, %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } - // CHECK-NEXT: dealloc %[[ALLOC]] : memref<5x4x3xf32> // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } @@ -134,6 +136,7 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_write(%M: index, %N: index, %O: index, %P: index) { + // CHECK-DAG: %[[ALLOC:.*]] = alloca() : memref<5x4xvector<3xf32>> // CHECK-DAG: %{{.*}} = constant dense<1.000000e+00> : vector<5x4x3xf32> // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[C1:.*]] = constant 1 : index @@ -145,8 +148,7 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} step 4 { // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} { // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 { - // CHECK: %[[ALLOC:.*]] = alloc() {alignment = 128 : i64} : memref<5x4x3xf32> - // CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4x3xf32> + // CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4xvector<3xf32>> // CHECK: store %{{.*}}, {{.*}} : memref> // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { @@ -177,13 +179,14 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index // CHECK-NEXT: %[[S3:.*]] = select {{.*}}, %[[C0]], {{.*}} : index + // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] // - // CHECK-NEXT: {{.*}} = load {{.*}}[%[[I6]], %[[I5]], %[[I4]]] : memref<5x4x3xf32> - // CHECK: store {{.*}}, {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref + // CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } - // CHECK-NEXT: dealloc {{.*}} : memref<5x4x3xf32> // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } @@ -232,7 +235,7 @@ func @transfer_read_progressive(%A : memref, %base: index) -> vector<3x %f7 = constant 7.0: f32 // CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32> - // CHECK-DAG: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>> + // CHECK-DAG: %[[alloc:.*]] = alloca() : memref<3xvector<15xf32>> // CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK-DAG: %[[dim:.*]] = dim %[[A]], %[[C0]] : memref // CHECK: affine.for %[[I:.*]] = 0 to 3 { @@ -307,7 +310,7 @@ func @transfer_read_progressive(%A : memref, %base: index) -> vector<3x // FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32> func @transfer_write_progressive(%A : memref, %base: index, %vec: vector<3x15xf32>) { // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>> + // CHECK: %[[alloc:.*]] = alloca() : memref<3xvector<15xf32>> // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref> // CHECK: store %[[vec]], %[[vmemref]][] : memref> // CHECK: %[[dim:.*]] = dim %[[A]], %[[C0]] : memref @@ -363,7 +366,7 @@ func @transfer_write_progressive(%A : memref, %base: index, %vec: vecto // FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32> func @transfer_write_progressive_unmasked(%A : memref, %base: index, %vec: vector<3x15xf32>) { // CHECK-NOT: scf.if - // CHECK-NEXT: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>> + // CHECK-NEXT: %[[alloc:.*]] = alloca() : memref<3xvector<15xf32>> // CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref> // CHECK-NEXT: store %[[vec]], %[[vmemref]][] : memref> // CHECK-NEXT: affine.for %[[I:.*]] = 0 to 3 { @@ -416,7 +419,7 @@ func @transfer_read_minor_identity(%A : memref) -> vector<3x3xf32> // CHECK: %[[cst:.*]] = constant 0.000000e+00 : f32 // CHECK: %[[c2:.*]] = constant 2 : index // CHECK: %[[cst0:.*]] = constant dense<0.000000e+00> : vector<3xf32> -// CHECK: %[[m:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<3xf32>> +// CHECK: %[[m:.*]] = alloca() : memref<3xvector<3xf32>> // CHECK: %[[d:.*]] = dim %[[A]], %[[c2]] : memref // CHECK: affine.for %[[arg1:.*]] = 0 to 3 { // CHECK: %[[cmp:.*]] = cmpi "slt", %[[arg1]], %[[d]] : index @@ -445,7 +448,7 @@ func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref) // CHECK: %[[c0:.*]] = constant 0 : index // CHECK: %[[c2:.*]] = constant 2 : index -// CHECK: %[[m:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<3xf32>> +// CHECK: %[[m:.*]] = alloca() : memref<3xvector<3xf32>> // CHECK: %[[cast:.*]] = vector.type_cast %[[m]] : memref<3xvector<3xf32>> to memref> // CHECK: store %[[A]], %[[cast]][] : memref> // CHECK: %[[d:.*]] = dim %[[B]], %[[c2]] : memref diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp index 062e4b5912297..4695090dacb52 100644 --- a/mlir/test/EDSC/builder-api-test.cpp +++ b/mlir/test/EDSC/builder-api-test.cpp @@ -1089,7 +1089,7 @@ TEST_FUNC(vector_extractelement_op_i32) { ScopedContext scope(builder, f.getLoc()); auto i32Type = builder.getI32Type(); auto vectorType = VectorType::get(/*shape=*/{8}, i32Type); - vector_extractelement( + vector_extract_element( i32Type, std_constant(vectorType, builder.getI32VectorAttr({10})), std_constant_int(0, i32Type)); From 81aa66f65f504af18982baa078a5f3f7d2aa88fa Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Fri, 28 Aug 2020 11:52:54 +0000 Subject: [PATCH 321/465] Extract infrastructure to ignore intermediate expressions into `clang/AST/IgnoreExpr.h` Rationale: This allows users to use `IgnoreExprNodes` and `Ignore*SingleStep` outside of `clang/AST/Expr.cpp`. Minor: Rename `IgnoreImp...SingleStep` into `IgnoreImplicit...SingleStep`. Differential Revision: https://reviews.llvm.org/D86778 --- clang/include/clang/AST/IgnoreExpr.h | 61 ++++++++++++ clang/lib/AST/CMakeLists.txt | 1 + clang/lib/AST/Expr.cpp | 138 +-------------------------- clang/lib/AST/IgnoreExpr.cpp | 129 +++++++++++++++++++++++++ 4 files changed, 195 insertions(+), 134 deletions(-) create mode 100644 clang/include/clang/AST/IgnoreExpr.h create mode 100644 clang/lib/AST/IgnoreExpr.cpp diff --git a/clang/include/clang/AST/IgnoreExpr.h b/clang/include/clang/AST/IgnoreExpr.h new file mode 100644 index 0000000000000..15d31f3af9954 --- /dev/null +++ b/clang/include/clang/AST/IgnoreExpr.h @@ -0,0 +1,61 @@ +//===--- IgnoreExpr.h - Ignore intermediate Expressions -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines common functions to ignore intermediate expression nodes +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_IGNOREEXPR_H +#define LLVM_CLANG_AST_IGNOREEXPR_H + +#include "clang/AST/Expr.h" + +namespace clang { +namespace detail { +/// Given an expression E and functions Fn_1,...,Fn_n : Expr * -> Expr *, +/// Return Fn_n(...(Fn_1(E))) +inline Expr *IgnoreExprNodesImpl(Expr *E) { return E; }; +template +Expr *IgnoreExprNodesImpl(Expr *E, FnTy &&Fn, FnTys &&... Fns) { + return IgnoreExprNodesImpl(Fn(E), std::forward(Fns)...); +} +} // namespace detail + +/// Given an expression E and functions Fn_1,...,Fn_n : Expr * -> Expr *, +/// Recursively apply each of the functions to E until reaching a fixed point. +/// Note that a null E is valid; in this case nothing is done. +template Expr *IgnoreExprNodes(Expr *E, FnTys &&... Fns) { + Expr *LastE = nullptr; + while (E != LastE) { + LastE = E; + E = detail::IgnoreExprNodesImpl(E, std::forward(Fns)...); + } + return E; +} + +Expr *IgnoreImplicitCastsSingleStep(Expr *E); + +Expr *IgnoreImplicitCastsExtraSingleStep(Expr *E); + +Expr *IgnoreCastsSingleStep(Expr *E); + +Expr *IgnoreLValueCastsSingleStep(Expr *E); + +Expr *IgnoreBaseCastsSingleStep(Expr *E); + +Expr *IgnoreImplicitSingleStep(Expr *E); + +Expr *IgnoreImplicitAsWrittenSingleStep(Expr *E); + +Expr *IgnoreParensOnlySingleStep(Expr *E); + +Expr *IgnoreParensSingleStep(Expr *E); + +} // namespace clang + +#endif // LLVM_CLANG_AST_IGNOREEXPR_H diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt index 35099fd0dacf8..dfd26fd97bc6d 100644 --- a/clang/lib/AST/CMakeLists.txt +++ b/clang/lib/AST/CMakeLists.txt @@ -55,6 +55,7 @@ add_clang_library(clangAST ExternalASTMerger.cpp ExternalASTSource.cpp FormatString.cpp + IgnoreExpr.cpp InheritViz.cpp Interp/ByteCodeEmitter.cpp Interp/ByteCodeExprGen.cpp diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 8efd6837c541b..1029acbf68cd1 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -21,6 +21,7 @@ #include "clang/AST/DependenceFlags.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/IgnoreExpr.h" #include "clang/AST/Mangle.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" @@ -2779,118 +2780,6 @@ QualType Expr::findBoundMemberType(const Expr *expr) { return QualType(); } -static Expr *IgnoreImpCastsSingleStep(Expr *E) { - if (auto *ICE = dyn_cast(E)) - return ICE->getSubExpr(); - - if (auto *FE = dyn_cast(E)) - return FE->getSubExpr(); - - return E; -} - -static Expr *IgnoreImpCastsExtraSingleStep(Expr *E) { - // FIXME: Skip MaterializeTemporaryExpr and SubstNonTypeTemplateParmExpr in - // addition to what IgnoreImpCasts() skips to account for the current - // behaviour of IgnoreParenImpCasts(). - Expr *SubE = IgnoreImpCastsSingleStep(E); - if (SubE != E) - return SubE; - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - -static Expr *IgnoreCastsSingleStep(Expr *E) { - if (auto *CE = dyn_cast(E)) - return CE->getSubExpr(); - - if (auto *FE = dyn_cast(E)) - return FE->getSubExpr(); - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - -static Expr *IgnoreLValueCastsSingleStep(Expr *E) { - // Skip what IgnoreCastsSingleStep skips, except that only - // lvalue-to-rvalue casts are skipped. - if (auto *CE = dyn_cast(E)) - if (CE->getCastKind() != CK_LValueToRValue) - return E; - - return IgnoreCastsSingleStep(E); -} - -static Expr *IgnoreBaseCastsSingleStep(Expr *E) { - if (auto *CE = dyn_cast(E)) - if (CE->getCastKind() == CK_DerivedToBase || - CE->getCastKind() == CK_UncheckedDerivedToBase || - CE->getCastKind() == CK_NoOp) - return CE->getSubExpr(); - - return E; -} - -static Expr *IgnoreImplicitSingleStep(Expr *E) { - Expr *SubE = IgnoreImpCastsSingleStep(E); - if (SubE != E) - return SubE; - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *BTE = dyn_cast(E)) - return BTE->getSubExpr(); - - return E; -} - -static Expr *IgnoreImplicitAsWrittenSingleStep(Expr *E) { - if (auto *ICE = dyn_cast(E)) - return ICE->getSubExprAsWritten(); - - return IgnoreImplicitSingleStep(E); -} - -static Expr *IgnoreParensOnlySingleStep(Expr *E) { - if (auto *PE = dyn_cast(E)) - return PE->getSubExpr(); - return E; -} - -static Expr *IgnoreParensSingleStep(Expr *E) { - if (auto *PE = dyn_cast(E)) - return PE->getSubExpr(); - - if (auto *UO = dyn_cast(E)) { - if (UO->getOpcode() == UO_Extension) - return UO->getSubExpr(); - } - - else if (auto *GSE = dyn_cast(E)) { - if (!GSE->isResultDependent()) - return GSE->getResultExpr(); - } - - else if (auto *CE = dyn_cast(E)) { - if (!CE->isConditionDependent()) - return CE->getChosenSubExpr(); - } - - return E; -} - static Expr *IgnoreNoopCastsSingleStep(const ASTContext &Ctx, Expr *E) { if (auto *CE = dyn_cast(E)) { // We ignore integer <-> casts that are of the same width, ptr<->ptr and @@ -2914,27 +2803,8 @@ static Expr *IgnoreNoopCastsSingleStep(const ASTContext &Ctx, Expr *E) { return E; } -static Expr *IgnoreExprNodesImpl(Expr *E) { return E; } -template -static Expr *IgnoreExprNodesImpl(Expr *E, FnTy &&Fn, FnTys &&... Fns) { - return IgnoreExprNodesImpl(Fn(E), std::forward(Fns)...); -} - -/// Given an expression E and functions Fn_1,...,Fn_n : Expr * -> Expr *, -/// Recursively apply each of the functions to E until reaching a fixed point. -/// Note that a null E is valid; in this case nothing is done. -template -static Expr *IgnoreExprNodes(Expr *E, FnTys &&... Fns) { - Expr *LastE = nullptr; - while (E != LastE) { - LastE = E; - E = IgnoreExprNodesImpl(E, std::forward(Fns)...); - } - return E; -} - Expr *Expr::IgnoreImpCasts() { - return IgnoreExprNodes(this, IgnoreImpCastsSingleStep); + return IgnoreExprNodes(this, IgnoreImplicitCastsSingleStep); } Expr *Expr::IgnoreCasts() { @@ -2955,7 +2825,7 @@ Expr *Expr::IgnoreParens() { Expr *Expr::IgnoreParenImpCasts() { return IgnoreExprNodes(this, IgnoreParensSingleStep, - IgnoreImpCastsExtraSingleStep); + IgnoreImplicitCastsExtraSingleStep); } Expr *Expr::IgnoreParenCasts() { @@ -2993,7 +2863,7 @@ Expr *Expr::IgnoreUnlessSpelledInSource() { while (E != LastE) { LastE = E; E = IgnoreExprNodes(E, IgnoreImplicitSingleStep, - IgnoreImpCastsExtraSingleStep, + IgnoreImplicitCastsExtraSingleStep, IgnoreParensOnlySingleStep); auto SR = E->getSourceRange(); diff --git a/clang/lib/AST/IgnoreExpr.cpp b/clang/lib/AST/IgnoreExpr.cpp new file mode 100644 index 0000000000000..65aaaeb6a1ed0 --- /dev/null +++ b/clang/lib/AST/IgnoreExpr.cpp @@ -0,0 +1,129 @@ +//===--- IgnoreExpr.cpp - Ignore intermediate Expressions -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements common functions to ignore intermediate expression nodes +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/IgnoreExpr.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" + +using namespace clang; + +Expr *clang::IgnoreImplicitCastsSingleStep(Expr *E) { + if (auto *ICE = dyn_cast(E)) + return ICE->getSubExpr(); + + if (auto *FE = dyn_cast(E)) + return FE->getSubExpr(); + + return E; +} + +Expr *clang::IgnoreImplicitCastsExtraSingleStep(Expr *E) { + // FIXME: Skip MaterializeTemporaryExpr and SubstNonTypeTemplateParmExpr in + // addition to what IgnoreImpCasts() skips to account for the current + // behaviour of IgnoreParenImpCasts(). + Expr *SubE = IgnoreImplicitCastsSingleStep(E); + if (SubE != E) + return SubE; + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; +} + +Expr *clang::IgnoreCastsSingleStep(Expr *E) { + if (auto *CE = dyn_cast(E)) + return CE->getSubExpr(); + + if (auto *FE = dyn_cast(E)) + return FE->getSubExpr(); + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; +} + +Expr *clang::IgnoreLValueCastsSingleStep(Expr *E) { + // Skip what IgnoreCastsSingleStep skips, except that only + // lvalue-to-rvalue casts are skipped. + if (auto *CE = dyn_cast(E)) + if (CE->getCastKind() != CK_LValueToRValue) + return E; + + return IgnoreCastsSingleStep(E); +} + +Expr *clang::IgnoreBaseCastsSingleStep(Expr *E) { + if (auto *CE = dyn_cast(E)) + if (CE->getCastKind() == CK_DerivedToBase || + CE->getCastKind() == CK_UncheckedDerivedToBase || + CE->getCastKind() == CK_NoOp) + return CE->getSubExpr(); + + return E; +} + +Expr *clang::IgnoreImplicitSingleStep(Expr *E) { + Expr *SubE = IgnoreImplicitCastsSingleStep(E); + if (SubE != E) + return SubE; + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *BTE = dyn_cast(E)) + return BTE->getSubExpr(); + + return E; +} + +Expr *clang::IgnoreImplicitAsWrittenSingleStep(Expr *E) { + if (auto *ICE = dyn_cast(E)) + return ICE->getSubExprAsWritten(); + + return IgnoreImplicitSingleStep(E); +} + +Expr *clang::IgnoreParensOnlySingleStep(Expr *E) { + if (auto *PE = dyn_cast(E)) + return PE->getSubExpr(); + return E; +} + +Expr *clang::IgnoreParensSingleStep(Expr *E) { + if (auto *PE = dyn_cast(E)) + return PE->getSubExpr(); + + if (auto *UO = dyn_cast(E)) { + if (UO->getOpcode() == UO_Extension) + return UO->getSubExpr(); + } + + else if (auto *GSE = dyn_cast(E)) { + if (!GSE->isResultDependent()) + return GSE->getResultExpr(); + } + + else if (auto *CE = dyn_cast(E)) { + if (!CE->isConditionDependent()) + return CE->getChosenSubExpr(); + } + + return E; +} From 1a7a2cd7474e6d321120ffe7ca9c52163eb228f0 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Mon, 31 Aug 2020 16:03:31 +0000 Subject: [PATCH 322/465] [Ignore Expressions][NFC] Refactor to better use `IgnoreExpr.h` and nits This change groups * Rename: `ignoreParenBaseCasts` -> `IgnoreParenBaseCasts` for uniformity * Rename: `IgnoreConversionOperator` -> `IgnoreConversionOperatorSingleStep` for uniformity * Inline `IgnoreNoopCastsSingleStep` into a lambda inside `IgnoreNoopCasts` * Refactor `IgnoreUnlessSpelledInSource` to make adequate use of `IgnoreExprNodes` Differential Revision: https://reviews.llvm.org/D86880 --- .../clang-tidy/modernize/UseAutoCheck.cpp | 2 +- .../readability/SimplifyBooleanExprCheck.cpp | 2 +- clang/include/clang/AST/Expr.h | 12 +-- clang/lib/AST/Expr.cpp | 97 +++++++++---------- clang/lib/CodeGen/CGExprCXX.cpp | 2 +- clang/lib/Sema/SemaExpr.cpp | 2 +- clang/lib/StaticAnalyzer/Core/CallEvent.cpp | 2 +- 7 files changed, 55 insertions(+), 64 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp index 04dc61f02df1e..44ae380b63b2e 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseAutoCheck.cpp @@ -338,7 +338,7 @@ void UseAutoCheck::replaceIterators(const DeclStmt *D, ASTContext *Context) { // Drill down to the as-written initializer. const Expr *E = (*Construct->arg_begin())->IgnoreParenImpCasts(); - if (E != E->IgnoreConversionOperator()) { + if (E != E->IgnoreConversionOperatorSingleStep()) { // We hit a conversion operator. Early-out now as they imply an implicit // conversion from a different type. Could also mean an explicit // conversion from the same type but that's pretty rare. diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp index 9dcb10b9d20c4..7e8ba4eb90c65 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp @@ -205,7 +205,7 @@ std::string compareExpressionToZero(const MatchFinder::MatchResult &Result, std::string replacementExpression(const MatchFinder::MatchResult &Result, bool Negated, const Expr *E) { - E = E->ignoreParenBaseCasts(); + E = E->IgnoreParenBaseCasts(); if (const auto *EC = dyn_cast(E)) E = EC->getSubExpr(); diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 5edca25937896..26e52ad367f81 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -867,9 +867,9 @@ class Expr : public ValueStmt { /// Skip conversion operators. If this Expr is a call to a conversion /// operator, return the argument. - Expr *IgnoreConversionOperator() LLVM_READONLY; - const Expr *IgnoreConversionOperator() const { - return const_cast(this)->IgnoreConversionOperator(); + Expr *IgnoreConversionOperatorSingleStep() LLVM_READONLY; + const Expr *IgnoreConversionOperatorSingleStep() const { + return const_cast(this)->IgnoreConversionOperatorSingleStep(); } /// Skip past any parentheses and lvalue casts which might surround this @@ -901,9 +901,9 @@ class Expr : public ValueStmt { /// * What IgnoreParens() skips /// * CastExpr which represent a derived-to-base cast (CK_DerivedToBase, /// CK_UncheckedDerivedToBase and CK_NoOp) - Expr *ignoreParenBaseCasts() LLVM_READONLY; - const Expr *ignoreParenBaseCasts() const { - return const_cast(this)->ignoreParenBaseCasts(); + Expr *IgnoreParenBaseCasts() LLVM_READONLY; + const Expr *IgnoreParenBaseCasts() const { + return const_cast(this)->IgnoreParenBaseCasts(); } /// Determine whether this expression is a default function argument. diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 1029acbf68cd1..15f3df0fd2168 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -40,7 +40,7 @@ using namespace clang; const Expr *Expr::getBestDynamicClassTypeExpr() const { const Expr *E = this; while (true) { - E = E->ignoreParenBaseCasts(); + E = E->IgnoreParenBaseCasts(); // Follow the RHS of a comma operator. if (auto *BO = dyn_cast(E)) { @@ -2780,29 +2780,6 @@ QualType Expr::findBoundMemberType(const Expr *expr) { return QualType(); } -static Expr *IgnoreNoopCastsSingleStep(const ASTContext &Ctx, Expr *E) { - if (auto *CE = dyn_cast(E)) { - // We ignore integer <-> casts that are of the same width, ptr<->ptr and - // ptr<->int casts of the same width. We also ignore all identity casts. - Expr *SubExpr = CE->getSubExpr(); - bool IsIdentityCast = - Ctx.hasSameUnqualifiedType(E->getType(), SubExpr->getType()); - bool IsSameWidthCast = - (E->getType()->isPointerType() || E->getType()->isIntegralType(Ctx)) && - (SubExpr->getType()->isPointerType() || - SubExpr->getType()->isIntegralType(Ctx)) && - (Ctx.getTypeSize(E->getType()) == Ctx.getTypeSize(SubExpr->getType())); - - if (IsIdentityCast || IsSameWidthCast) - return SubExpr; - } - - else if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - Expr *Expr::IgnoreImpCasts() { return IgnoreExprNodes(this, IgnoreImplicitCastsSingleStep); } @@ -2832,7 +2809,7 @@ Expr *Expr::IgnoreParenCasts() { return IgnoreExprNodes(this, IgnoreParensSingleStep, IgnoreCastsSingleStep); } -Expr *Expr::IgnoreConversionOperator() { +Expr *Expr::IgnoreConversionOperatorSingleStep() { if (auto *MCE = dyn_cast(this)) { if (MCE->getMethodDecl() && isa(MCE->getMethodDecl())) return MCE->getImplicitObjectArgument(); @@ -2845,58 +2822,72 @@ Expr *Expr::IgnoreParenLValueCasts() { IgnoreLValueCastsSingleStep); } -Expr *Expr::ignoreParenBaseCasts() { +Expr *Expr::IgnoreParenBaseCasts() { return IgnoreExprNodes(this, IgnoreParensSingleStep, IgnoreBaseCastsSingleStep); } Expr *Expr::IgnoreParenNoopCasts(const ASTContext &Ctx) { - return IgnoreExprNodes(this, IgnoreParensSingleStep, [&Ctx](Expr *E) { - return IgnoreNoopCastsSingleStep(Ctx, E); - }); + auto IgnoreNoopCastsSingleStep = [&Ctx](Expr *E) { + if (auto *CE = dyn_cast(E)) { + // We ignore integer <-> casts that are of the same width, ptr<->ptr and + // ptr<->int casts of the same width. We also ignore all identity casts. + Expr *SubExpr = CE->getSubExpr(); + bool IsIdentityCast = + Ctx.hasSameUnqualifiedType(E->getType(), SubExpr->getType()); + bool IsSameWidthCast = (E->getType()->isPointerType() || + E->getType()->isIntegralType(Ctx)) && + (SubExpr->getType()->isPointerType() || + SubExpr->getType()->isIntegralType(Ctx)) && + (Ctx.getTypeSize(E->getType()) == + Ctx.getTypeSize(SubExpr->getType())); + + if (IsIdentityCast || IsSameWidthCast) + return SubExpr; + } else if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; + }; + return IgnoreExprNodes(this, IgnoreParensSingleStep, + IgnoreNoopCastsSingleStep); } Expr *Expr::IgnoreUnlessSpelledInSource() { - Expr *E = this; - - Expr *LastE = nullptr; - while (E != LastE) { - LastE = E; - E = IgnoreExprNodes(E, IgnoreImplicitSingleStep, - IgnoreImplicitCastsExtraSingleStep, - IgnoreParensOnlySingleStep); - - auto SR = E->getSourceRange(); - + auto IgnoreImplicitConstructorSingleStep = [](Expr *E) { if (auto *C = dyn_cast(E)) { auto NumArgs = C->getNumArgs(); if (NumArgs == 1 || (NumArgs > 1 && isa(C->getArg(1)))) { Expr *A = C->getArg(0); - if (A->getSourceRange() == SR || !isa(C)) - E = A; + if (A->getSourceRange() == E->getSourceRange() || + !isa(C)) + return A; } } - + return E; + }; + auto IgnoreImplicitMemberCallSingleStep = [](Expr *E) { if (auto *C = dyn_cast(E)) { Expr *ExprNode = C->getImplicitObjectArgument(); - if (ExprNode->getSourceRange() == SR) { - E = ExprNode; - continue; + if (ExprNode->getSourceRange() == E->getSourceRange()) { + return ExprNode; } if (auto *PE = dyn_cast(ExprNode)) { if (PE->getSourceRange() == C->getSourceRange()) { - E = PE; - continue; + return cast(PE); } } ExprNode = ExprNode->IgnoreParenImpCasts(); - if (ExprNode->getSourceRange() == SR) - E = ExprNode; + if (ExprNode->getSourceRange() == E->getSourceRange()) + return ExprNode; } - } - - return E; + return E; + }; + return IgnoreExprNodes( + this, IgnoreImplicitSingleStep, IgnoreImplicitCastsExtraSingleStep, + IgnoreParensOnlySingleStep, IgnoreImplicitConstructorSingleStep, + IgnoreImplicitMemberCallSingleStep); } bool Expr::isDefaultArgument() const { diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index d0e0c7d6c0603..50b6079bd80bf 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -220,7 +220,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( DevirtualizedMethod = MD->getCorrespondingMethodInClass(BestDynamicDecl); assert(DevirtualizedMethod); const CXXRecordDecl *DevirtualizedClass = DevirtualizedMethod->getParent(); - const Expr *Inner = Base->ignoreParenBaseCasts(); + const Expr *Inner = Base->IgnoreParenBaseCasts(); if (DevirtualizedMethod->getReturnType().getCanonicalType() != MD->getReturnType().getCanonicalType()) // If the return types are not the same, this might be a case where more diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index cd71ce70c70ef..d6f0a12106fe0 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -8372,7 +8372,7 @@ static bool IsArithmeticBinaryExpr(Expr *E, BinaryOperatorKind *Opcode, Expr **RHSExprs) { // Don't strip parenthesis: we should not warn if E is in parenthesis. E = E->IgnoreImpCasts(); - E = E->IgnoreConversionOperator(); + E = E->IgnoreConversionOperatorSingleStep(); E = E->IgnoreImpCasts(); if (auto *MTE = dyn_cast(E)) { E = MTE->getSubExpr(); diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index 78d13ddfb773c..a55d9302ca587 100644 --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -687,7 +687,7 @@ void CXXInstanceCall::getExtraInvalidatedValues( // base class decl, rather than the class of the instance which needs to be // checked for mutable fields. // TODO: We might as well look at the dynamic type of the object. - const Expr *Ex = getCXXThisExpr()->ignoreParenBaseCasts(); + const Expr *Ex = getCXXThisExpr()->IgnoreParenBaseCasts(); QualType T = Ex->getType(); if (T->isPointerType()) // Arrow or implicit-this syntax? T = T->getPointeeType(); From 0dbe2504af81fc8ac7438d490b98370740442805 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 10:28:01 +0100 Subject: [PATCH 323/465] [X86] Use Register instead of unsigned. NFCI. Fixes llvm-prefer-register-over-unsigned clang-tidy warning. --- llvm/lib/Target/X86/X86AsmPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index aa03217d155d5..75b2368ce1850 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -448,7 +448,7 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, static bool printAsmVRegister(X86AsmPrinter &P, const MachineOperand &MO, char Mode, raw_ostream &O) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT; unsigned Index; From 22fa6b20d92efe796ad881aafe6e689960fe6e7d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 10:30:53 +0100 Subject: [PATCH 324/465] [X86] Use Register instead of unsigned. NFCI. Fixes llvm-prefer-register-over-unsigned clang-tidy warnings. --- llvm/lib/Target/X86/X86FrameLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index c7ca6fb2a4fcf..d7a377e0c6ba8 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -490,7 +490,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( } const MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const unsigned FramePtr = TRI->getFrameRegister(MF); + const Register FramePtr = TRI->getFrameRegister(MF); const unsigned MachineFramePtr = STI.isTarget64BitILP32() ? unsigned(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; @@ -1788,7 +1788,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. const bool Is64BitILP32 = STI.isTarget64BitILP32(); Register FramePtr = TRI->getFrameRegister(MF); - unsigned MachineFramePtr = + Register MachineFramePtr = Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); From ee68b66d94b50d8c9ff14d3217a77c66b0e2c32f Mon Sep 17 00:00:00 2001 From: Esme-Yi Date: Mon, 7 Sep 2020 09:45:47 +0000 Subject: [PATCH 325/465] [NFC][PowerPC] Add tests for `mul` with big constants. --- llvm/test/CodeGen/PowerPC/mulli.ll | 70 ++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/mulli.ll diff --git a/llvm/test/CodeGen/PowerPC/mulli.ll b/llvm/test/CodeGen/PowerPC/mulli.ll new file mode 100644 index 0000000000000..3e417f9720a84 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/mulli.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +define i64 @test1(i64 %x) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: li 4, 625 +; CHECK-NEXT: sldi 4, 4, 36 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, 42949672960000 + ret i64 %y +} + +define i64 @test2(i64 %x) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: +; CHECK-NEXT: li 4, -625 +; CHECK-NEXT: sldi 4, 4, 36 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, -42949672960000 + ret i64 %y +} + +define i64 @test3(i64 %x) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, 74 +; CHECK-NEXT: ori 4, 4, 16384 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, 4866048 + ret i64 %y +} + +define i64 @test4(i64 %x) { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, -75 +; CHECK-NEXT: ori 4, 4, 49152 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, -4866048 + ret i64 %y +} + +define i64 @test5(i64 %x) { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, 16 +; CHECK-NEXT: ori 4, 4, 1 +; CHECK-NEXT: sldi 4, 4, 12 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, 4294971392 + ret i64 %y +} + +define i64 @test6(i64 %x) { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, -17 +; CHECK-NEXT: ori 4, 4, 65535 +; CHECK-NEXT: sldi 4, 4, 12 +; CHECK-NEXT: mulld 3, 3, 4 +; CHECK-NEXT: blr + %y = mul i64 %x, -4294971392 + ret i64 %y +} From 9ad261540da6e66a666e48fed95455bc27fa995b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 10:49:29 +0100 Subject: [PATCH 326/465] [X86] Use Register instead of unsigned. NFCI. Fixes llvm-prefer-register-over-unsigned clang-tidy warnings. --- llvm/lib/Target/X86/X86InstrInfo.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 3c24f51ba36b1..5aac29e21d6f9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6712,7 +6712,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { "X86-64 PIC uses RIP relative addressing"); X86MachineFunctionInfo *X86FI = MF->getInfo(); - unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); + Register GlobalBaseReg = X86FI->getGlobalBaseReg(); if (GlobalBaseReg != 0) return GlobalBaseReg; @@ -8268,7 +8268,7 @@ describeMOVrrLoadedValue(const MachineInstr &MI, Register DescribedReg, // If the described register is a sub-register of the destination register, // then pick out the source register's corresponding sub-register. if (unsigned SubRegIdx = TRI->getSubRegIndex(DestReg, DescribedReg)) { - unsigned SrcSubReg = TRI->getSubReg(SrcReg, SubRegIdx); + Register SrcSubReg = TRI->getSubReg(SrcReg, SubRegIdx); return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr); } @@ -8532,7 +8532,7 @@ namespace { return false; X86MachineFunctionInfo *X86FI = MF.getInfo(); - unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); + Register GlobalBaseReg = X86FI->getGlobalBaseReg(); // If we didn't need a GlobalBaseReg, don't insert code. if (GlobalBaseReg == 0) @@ -8545,7 +8545,7 @@ namespace { MachineRegisterInfo &RegInfo = MF.getRegInfo(); const X86InstrInfo *TII = STI.getInstrInfo(); - unsigned PC; + Register PC; if (STI.isPICStyleGOT()) PC = RegInfo.createVirtualRegister(&X86::GR32RegClass); else @@ -8615,7 +8615,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } }; -} +} // namespace char CGBR::ID = 0; FunctionPass* From aa3fcb967110f2d448d241358cadc048954e6134 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 10:58:53 +0100 Subject: [PATCH 327/465] [X86][AVX] Add extra vperm2f128+vpermilvar combine coverage The existing test /should/ reduce to a vmovaps (concat xmm with zero upper). --- .../CodeGen/X86/vector-shuffle-combining-avx.ll | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 6ffbe095c39ba..d4ef76a2a9cff 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -142,8 +142,19 @@ define <8 x float> @combine_vpermilvar_vperm2f128_zero_8f32(<8 x float> %a0) { ret <8 x float> %3 } -define <4 x double> @combine_vperm2f128_vpermilvar_as_vpblendpd(<4 x double> %a0) { -; CHECK-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd: +define <4 x double> @combine_vperm2f128_vpermilvar_as_vperm2f128(<4 x double> %a0) { +; CHECK-LABEL: combine_vperm2f128_vpermilvar_as_vperm2f128: +; CHECK: # %bb.0: +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] +; CHECK-NEXT: ret{{[l|q]}} + %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) + %2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> + %3 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %2, <4 x i64> ) + ret <4 x double> %3 +} + +define <4 x double> @combine_vperm2f128_vpermilvar_as_vmovaps(<4 x double> %a0) { +; CHECK-LABEL: combine_vperm2f128_vpermilvar_as_vmovaps: ; CHECK: # %bb.0: ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] From 71dfdbe2c73afcc319bfd96c9e73407ea9245e3a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 11:10:40 +0100 Subject: [PATCH 328/465] [X86] getFauxShuffleMask - handle insert_subvector(zero, sub, C) Directly use SM_SentinelZero elements if we're (widening)inserting into a zero vector. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++++-- llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll | 3 +-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 99d35f0c91ffa..09855fd0eb925 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7452,8 +7452,11 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, } Ops.push_back(Src); Ops.append(SubInputs.begin(), SubInputs.end()); - for (int i = 0; i != (int)NumElts; ++i) - Mask.push_back(i); + if (ISD::isBuildVectorAllZeros(Src.getNode())) + Mask.append(NumElts, SM_SentinelZero); + else + for (int i = 0; i != (int)NumElts; ++i) + Mask.push_back(i); for (int i = 0; i != (int)NumSubElts; ++i) { int M = SubMask[i]; if (0 <= M) { diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index d4ef76a2a9cff..e744dbd103362 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -156,8 +156,7 @@ define <4 x double> @combine_vperm2f128_vpermilvar_as_vperm2f128(<4 x double> %a define <4 x double> @combine_vperm2f128_vpermilvar_as_vmovaps(<4 x double> %a0) { ; CHECK-LABEL: combine_vperm2f128_vpermilvar_as_vmovaps: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; CHECK-NEXT: vmovaps %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) %2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> From 7ba0f81934ca5f4baa1d81ac0032f2e4ff6614ec Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 7 Sep 2020 12:24:30 +0200 Subject: [PATCH 329/465] [X86] Unbreak the build after 22fa6b20d92e --- llvm/lib/Target/X86/X86FrameLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index d7a377e0c6ba8..7437c2e978af2 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -491,8 +491,8 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( const MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const Register FramePtr = TRI->getFrameRegister(MF); - const unsigned MachineFramePtr = - STI.isTarget64BitILP32() ? unsigned(getX86SubSuperRegister(FramePtr, 64)) + const Register MachineFramePtr = + STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true); // Offset = space for return address + size of the frame pointer itself. From 56d1f3138b532f4e195a5aaba9ea65a8bcb8adb4 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 7 Sep 2020 10:25:26 +0000 Subject: [PATCH 330/465] [gn build] Port 81aa66f65f5 --- llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn index 4d645799dbf65..bb3d69d046bef 100644 --- a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn @@ -81,6 +81,7 @@ static_library("AST") { "ExternalASTMerger.cpp", "ExternalASTSource.cpp", "FormatString.cpp", + "IgnoreExpr.cpp", "InheritViz.cpp", "Interp/ByteCodeEmitter.cpp", "Interp/ByteCodeExprGen.cpp", From 0478720157f6413fad7595b8eff9c70d2d99b637 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Mon, 7 Sep 2020 11:23:39 +0200 Subject: [PATCH 331/465] [clang] Prevent that Decl::dump on a CXXRecordDecl deserialises further declarations. Decl::dump is primarily used for debugging to visualise the current state of a declaration. Usually Decl::dump just displays the current state of the Decl and doesn't actually change any of its state, however since commit 457226e02a6e8533eaaa864a3fd7c8eeccd2bf58 the method actually started loading additional declarations from the ExternalASTSource. This causes that calling Decl::dump during a debugging session now actually does permanent changes to the AST and will cause the debugged program run to deviate from the original run. The change that caused this behaviour is the addition of `hasConstexprDestructor` (which is called from the TextNodeDumper) which performs a lookup into the current CXXRecordDecl to find the destructor. All other similar methods just return their respective bit in the DefinitionData (which obviously doesn't have such side effects). This just changes the node printer to emit "unknown_constexpr" in case a CXXRecordDecl is dumped that could potentially call into the ExternalASTSource instead of the usually empty string/"constexpr". For CXXRecordDecls that can safely be dumped the old behaviour is preserved Reviewed By: bruno Differential Revision: https://reviews.llvm.org/D80878 --- clang/lib/AST/TextNodeDumper.cpp | 6 +- clang/test/AST/ast-dump-lambda.cpp | 32 +++---- clang/test/AST/ast-dump-records.cpp | 22 ++--- clang/unittests/AST/ASTDumpTest.cpp | 140 ++++++++++++++++++++++++++++ clang/unittests/AST/CMakeLists.txt | 1 + 5 files changed, 173 insertions(+), 28 deletions(-) create mode 100644 clang/unittests/AST/ASTDumpTest.cpp diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 16c4c3736a4a3..19b7b4c801d55 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1960,7 +1960,11 @@ void TextNodeDumper::VisitCXXRecordDecl(const CXXRecordDecl *D) { FLAG(hasTrivialDestructor, trivial); FLAG(hasNonTrivialDestructor, non_trivial); FLAG(hasUserDeclaredDestructor, user_declared); - FLAG(hasConstexprDestructor, constexpr); + // Avoid calls to the external source. + if (!D->hasExternalVisibleStorage()) { + FLAG(hasConstexprDestructor, constexpr); + } else + OS << " maybe_constexpr"; FLAG(needsImplicitDestructor, needs_implicit); FLAG(needsOverloadResolutionForDestructor, needs_overload_resolution); if (!D->needsOverloadResolutionForDestructor()) diff --git a/clang/test/AST/ast-dump-lambda.cpp b/clang/test/AST/ast-dump-lambda.cpp index 37fb62ef9930e..302b93734459b 100644 --- a/clang/test/AST/ast-dump-lambda.cpp +++ b/clang/test/AST/ast-dump-lambda.cpp @@ -48,7 +48,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | |-CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | |-MoveAssignment exists simple trivial needs_implicit -// CHECK-NEXT: | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | |-CXXRecordDecl {{.*}} col:10{{( imported)?}} implicit struct V // CHECK-NEXT: | `-CXXMethodDecl {{.*}} line:17:10{{( imported)?}} f 'void ()' // CHECK-NEXT: | `-CompoundStmt {{.*}} @@ -60,7 +60,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | | |-MoveAssignment -// CHECK-NEXT: | | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | | |-CXXMethodDecl {{.*}} col:7{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | | `-FieldDecl {{.*}} col:8{{( imported)?}} implicit 'V *' @@ -75,7 +75,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:7{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | `-FieldDecl {{.*}} col:8{{( imported)?}} implicit 'V' @@ -94,7 +94,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | |-CXXConversionDecl {{.*}} col:3{{( imported)?}} implicit constexpr operator auto (*)() 'auto (*() const noexcept)()' inline @@ -108,7 +108,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto (int, ...) const' inline // CHECK-NEXT: | | | |-ParmVarDecl {{.*}} col:10{{( imported)?}} a 'int' // CHECK-NEXT: | | | `-CompoundStmt {{.*}} @@ -124,7 +124,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | `-FieldDecl {{.*}} col:4{{( imported)?}} implicit 'Ts...' @@ -139,7 +139,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | `-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | `-CompoundStmt {{.*}} @@ -151,7 +151,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | `-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | `-ReturnStmt {{.*}} @@ -167,7 +167,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | `-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | `-CompoundStmt {{.*}} @@ -179,7 +179,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | `-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | `-ReturnStmt {{.*}} @@ -195,7 +195,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | | `-ReturnStmt {{.*}} @@ -224,7 +224,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | |-FieldDecl {{.*}} col:4{{( imported)?}} implicit 'Ts...' @@ -241,7 +241,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} constexpr operator() 'auto () const' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | |-CXXConversionDecl {{.*}} col:3{{( imported)?}} implicit constexpr operator auto (*)() 'auto (*() const noexcept)()' inline @@ -255,7 +255,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto ()' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | |-CXXConversionDecl {{.*}} col:3{{( imported)?}} implicit constexpr operator auto (*)() 'auto (*() const noexcept)()' inline @@ -269,7 +269,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const noexcept' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | |-CXXConversionDecl {{.*}} col:3{{( imported)?}} implicit constexpr operator auto (*)() noexcept 'auto (*() const noexcept)() noexcept' inline @@ -283,7 +283,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | |-MoveAssignment -// CHECK-NEXT: | | `-Destructor simple irrelevant trivial needs_implicit +// CHECK-NEXT: | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> int' inline // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | `-ReturnStmt {{.*}} diff --git a/clang/test/AST/ast-dump-records.cpp b/clang/test/AST/ast-dump-records.cpp index cb7ac83204312..cdaa2ef16eba8 100644 --- a/clang/test/AST/ast-dump-records.cpp +++ b/clang/test/AST/ast-dump-records.cpp @@ -22,7 +22,7 @@ struct A { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: CXXRecordDecl 0x{{[^ ]*}} col:8 implicit struct A int a; @@ -57,7 +57,7 @@ struct C { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: CXXRecordDecl 0x{{[^ ]*}} col:8 implicit struct C struct { @@ -68,7 +68,7 @@ struct C { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit int a; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 a 'int' } b; @@ -82,7 +82,7 @@ struct C { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit int c; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 c 'int' float d; @@ -104,7 +104,7 @@ struct C { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit int e, f; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 e 'int' // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:12 f 'int' @@ -126,7 +126,7 @@ struct D { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: CXXRecordDecl 0x{{[^ ]*}} col:8 implicit struct D int a; @@ -151,7 +151,7 @@ union E { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: CXXRecordDecl 0x{{[^ ]*}} col:7 implicit union E int a; @@ -186,7 +186,7 @@ union G { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit // CHECK-NEXT: CXXRecordDecl 0x{{[^ ]*}} col:7 implicit union G struct { @@ -197,7 +197,7 @@ union G { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit int a; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 a 'int' @@ -214,7 +214,7 @@ union G { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit int c; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 c 'int' @@ -237,7 +237,7 @@ union G { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit int e, f; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 e 'int' diff --git a/clang/unittests/AST/ASTDumpTest.cpp b/clang/unittests/AST/ASTDumpTest.cpp new file mode 100644 index 0000000000000..45884dfd11d05 --- /dev/null +++ b/clang/unittests/AST/ASTDumpTest.cpp @@ -0,0 +1,140 @@ +//===- unittests/AST/ASTDumpTest.cpp --- Declaration tests ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Tests Decl::dump(). +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclObjC.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "gtest/gtest.h" + +using namespace clang; + +namespace clang { +namespace ast { + +namespace { +/// An ExternalASTSource that asserts if it is queried for information about +/// any declaration. +class TrappingExternalASTSource : public ExternalASTSource { + ~TrappingExternalASTSource() override = default; + bool FindExternalVisibleDeclsByName(const DeclContext *, + DeclarationName) override { + assert(false && "Unexpected call to FindExternalVisibleDeclsByName"); + return true; + } + + void FindExternalLexicalDecls(const DeclContext *, + llvm::function_ref, + SmallVectorImpl &) override { + assert(false && "Unexpected call to FindExternalLexicalDecls"); + } + + void completeVisibleDeclsMap(const DeclContext *) override { + assert(false && "Unexpected call to completeVisibleDeclsMap"); + } + + void CompleteRedeclChain(const Decl *) override { + assert(false && "Unexpected call to CompleteRedeclChain"); + } + + void CompleteType(TagDecl *) override { + assert(false && "Unexpected call to CompleteType(Tag Decl*)"); + } + + void CompleteType(ObjCInterfaceDecl *) override { + assert(false && "Unexpected call to CompleteType(ObjCInterfaceDecl *)"); + } +}; + +/// Tests that Decl::dump doesn't load additional declarations from the +/// ExternalASTSource. +class ExternalASTSourceDumpTest : public ::testing::Test { +protected: + ExternalASTSourceDumpTest() + : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), + Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), + SourceMgr(Diags, FileMgr), Idents(LangOpts, nullptr), + Ctxt(LangOpts, SourceMgr, Idents, Sels, Builtins) { + Ctxt.setExternalSource(new TrappingExternalASTSource()); + } + + FileSystemOptions FileMgrOpts; + FileManager FileMgr; + IntrusiveRefCntPtr DiagID; + DiagnosticsEngine Diags; + SourceManager SourceMgr; + LangOptions LangOpts; + IdentifierTable Idents; + SelectorTable Sels; + Builtin::Context Builtins; + ASTContext Ctxt; +}; +} // unnamed namespace + +/// Set all flags that activate queries to the ExternalASTSource. +static void setExternalStorageFlags(DeclContext *DC) { + DC->setHasExternalLexicalStorage(); + DC->setHasExternalVisibleStorage(); + DC->setMustBuildLookupTable(); +} + +/// Dumps the given Decl. +static void dumpDecl(Decl *D) { + // Try dumping the decl which shouldn't trigger any calls to the + // ExternalASTSource. + + std::string Out; + llvm::raw_string_ostream OS(Out); + D->dump(OS); +} + +TEST_F(ExternalASTSourceDumpTest, DumpObjCInterfaceDecl) { + // Define an Objective-C interface. + ObjCInterfaceDecl *I = ObjCInterfaceDecl::Create( + Ctxt, Ctxt.getTranslationUnitDecl(), SourceLocation(), + &Ctxt.Idents.get("c"), nullptr, nullptr); + Ctxt.getTranslationUnitDecl()->addDecl(I); + + setExternalStorageFlags(I); + dumpDecl(I); +} + +TEST_F(ExternalASTSourceDumpTest, DumpRecordDecl) { + // Define a struct. + RecordDecl *R = RecordDecl::Create( + Ctxt, TagDecl::TagKind::TTK_Class, Ctxt.getTranslationUnitDecl(), + SourceLocation(), SourceLocation(), &Ctxt.Idents.get("c")); + R->startDefinition(); + R->completeDefinition(); + Ctxt.getTranslationUnitDecl()->addDecl(R); + + setExternalStorageFlags(R); + dumpDecl(R); +} + +TEST_F(ExternalASTSourceDumpTest, DumpCXXRecordDecl) { + // Define a class. + CXXRecordDecl *R = CXXRecordDecl::Create( + Ctxt, TagDecl::TagKind::TTK_Class, Ctxt.getTranslationUnitDecl(), + SourceLocation(), SourceLocation(), &Ctxt.Idents.get("c")); + R->startDefinition(); + R->completeDefinition(); + Ctxt.getTranslationUnitDecl()->addDecl(R); + + setExternalStorageFlags(R); + dumpDecl(R); +} + +} // end namespace ast +} // end namespace clang diff --git a/clang/unittests/AST/CMakeLists.txt b/clang/unittests/AST/CMakeLists.txt index 2d5d0172afedc..9e0a33fd762fd 100644 --- a/clang/unittests/AST/CMakeLists.txt +++ b/clang/unittests/AST/CMakeLists.txt @@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS add_clang_unittest(ASTTests ASTContextParentMapTest.cpp + ASTDumpTest.cpp ASTImporterFixtures.cpp ASTImporterTest.cpp ASTImporterGenericRedeclTest.cpp From 9764eb9212c598f165e9d7dfeb273b74f7777a41 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 7 Sep 2020 10:32:22 +0000 Subject: [PATCH 332/465] [gn build] Port 0478720157f --- llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn index f25ead00165c0..fd24f89aa187f 100644 --- a/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn @@ -15,6 +15,7 @@ unittest("ASTTests") { ] sources = [ "ASTContextParentMapTest.cpp", + "ASTDumpTest.cpp", "ASTImporterFixtures.cpp", "ASTImporterGenericRedeclTest.cpp", "ASTImporterODRStrategiesTest.cpp", From 928c4b4b4988b4d633a96afa4c7f4584bc0009e5 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Mon, 7 Sep 2020 11:54:05 +0100 Subject: [PATCH 333/465] [SCEV] Refactor isHighCostExpansionHelper To enable the cost of constants, the helper function has been reorganised: - A struct has been introduced to hold SCEV operand information so that we know the user of the operand, as well as the operand index. The Worklist now uses instead instead of a bare SCEV. - The costing of each SCEV, and collection of its operands, is now performed in a helper function. Differential Revision: https://reviews.llvm.org/D86050 --- .../Utils/ScalarEvolutionExpander.h | 33 +- .../Utils/ScalarEvolutionExpander.cpp | 289 +++++++++--------- 2 files changed, 167 insertions(+), 155 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 78ae38288c0c3..77360cb2671d8 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -39,6 +39,19 @@ bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE); bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, ScalarEvolution &SE); +/// struct for holding enough information to help calculate the cost of the +/// given SCEV when expanded into IR. +struct SCEVOperand { + explicit SCEVOperand(unsigned Opc, int Idx, const SCEV *S) : + ParentOpcode(Opc), OperandIdx(Idx), S(S) { } + /// LLVM instruction opcode that uses the operand. + unsigned ParentOpcode; + /// The use index of an expanded instruction. + int OperandIdx; + /// The SCEV operand to be costed. + const SCEV* S; +}; + /// This class uses information about analyze scalars to rewrite expressions /// in canonical form. /// @@ -220,14 +233,14 @@ class SCEVExpander : public SCEVVisitor { assert(At && "This function requires At instruction to be provided."); if (!TTI) // In assert-less builds, avoid crashing return true; // by always claiming to be high-cost. - SmallVector Worklist; + SmallVector Worklist; SmallPtrSet Processed; int BudgetRemaining = Budget * TargetTransformInfo::TCC_Basic; - Worklist.emplace_back(Expr); + Worklist.emplace_back(-1, -1, Expr); while (!Worklist.empty()) { - const SCEV *S = Worklist.pop_back_val(); - if (isHighCostExpansionHelper(S, L, *At, BudgetRemaining, *TTI, Processed, - Worklist)) + const SCEVOperand WorkItem = Worklist.pop_back_val(); + if (isHighCostExpansionHelper(WorkItem, L, *At, BudgetRemaining, + *TTI, Processed, Worklist)) return true; } assert(BudgetRemaining >= 0 && "Should have returned from inner loop."); @@ -394,11 +407,11 @@ class SCEVExpander : public SCEVVisitor { Value *expandCodeForImpl(const SCEV *SH, Type *Ty, Instruction *I, bool Root); /// Recursive helper function for isHighCostExpansion. - bool isHighCostExpansionHelper(const SCEV *S, Loop *L, const Instruction &At, - int &BudgetRemaining, - const TargetTransformInfo &TTI, - SmallPtrSetImpl &Processed, - SmallVectorImpl &Worklist); + bool isHighCostExpansionHelper( + const SCEVOperand &WorkItem, Loop *L, const Instruction &At, + int &BudgetRemaining, const TargetTransformInfo &TTI, + SmallPtrSetImpl &Processed, + SmallVectorImpl &Worklist); /// Insert the specified binary operator, doing a small amount of work to /// avoid inserting an obviously redundant operation, and hoisting to an diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 1e8b11d6ac5fe..1bb827cd3057b 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2177,13 +2177,133 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, return None; } +template static int costAndCollectOperands( + const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, + TargetTransformInfo::TargetCostKind CostKind, + SmallVectorImpl &Worklist) { + + const T *S = cast(WorkItem.S); + int Cost = 0; + // Collect the opcodes of all the instructions that will be needed to expand + // the SCEVExpr. This is so that when we come to cost the operands, we know + // what the generated user(s) will be. + SmallVector Opcodes; + + auto CastCost = [&](unsigned Opcode) { + Opcodes.push_back(Opcode); + return TTI.getCastInstrCost(Opcode, S->getType(), + S->getOperand(0)->getType(), + TTI::CastContextHint::None, CostKind); + }; + + auto ArithCost = [&](unsigned Opcode, unsigned NumRequired) { + Opcodes.push_back(Opcode); + return NumRequired * + TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind); + }; + + auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired) { + Opcodes.push_back(Opcode); + Type *OpType = S->getOperand(0)->getType(); + return NumRequired * + TTI.getCmpSelInstrCost(Opcode, OpType, + CmpInst::makeCmpResultType(OpType), CostKind); + }; + + switch (S->getSCEVType()) { + default: + llvm_unreachable("No other scev expressions possible."); + case scUnknown: + case scConstant: + return 0; + case scTruncate: + Cost = CastCost(Instruction::Trunc); + break; + case scZeroExtend: + Cost = CastCost(Instruction::ZExt); + break; + case scSignExtend: + Cost = CastCost(Instruction::SExt); + break; + case scUDivExpr: { + unsigned Opcode = Instruction::UDiv; + if (auto *SC = dyn_cast(S->getOperand(1))) + if (SC->getAPInt().isPowerOf2()) + Opcode = Instruction::LShr; + Cost = ArithCost(Opcode, 1); + break; + } + case scAddExpr: + Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1); + break; + case scMulExpr: + // TODO: this is a very pessimistic cost modelling for Mul, + // because of Bin Pow algorithm actually used by the expander, + // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). + Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1); + break; + case scSMaxExpr: + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: { + Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1); + Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1); + break; + } + case scAddRecExpr: { + // In this polynominal, we may have some zero operands, and we shouldn't + // really charge for those. So how many non-zero coeffients are there? + int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) { + return !Op->isZero(); + }); + + assert(NumTerms >= 1 && "Polynominal should have at least one term."); + assert(!(*std::prev(S->operands().end()))->isZero() && + "Last operand should not be zero"); + + // Ignoring constant term (operand 0), how many of the coeffients are u> 1? + int NumNonZeroDegreeNonOneTerms = + llvm::count_if(S->operands(), [](const SCEV *Op) { + auto *SConst = dyn_cast(Op); + return !SConst || SConst->getAPInt().ugt(1); + }); + + // Much like with normal add expr, the polynominal will require + // one less addition than the number of it's terms. + int AddCost = ArithCost(Instruction::Add, NumTerms - 1); + // Here, *each* one of those will require a multiplication. + int MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); + Cost = AddCost + MulCost; + + // What is the degree of this polynominal? + int PolyDegree = S->getNumOperands() - 1; + assert(PolyDegree >= 1 && "Should be at least affine."); + + // The final term will be: + // Op_{PolyDegree} * x ^ {PolyDegree} + // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. + // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for + // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. + // FIXME: this is conservatively correct, but might be overly pessimistic. + Cost += MulCost * (PolyDegree - 1); + } + } + + for (unsigned Opc : Opcodes) + for (auto I : enumerate(S->operands())) + Worklist.emplace_back(Opc, I.index(), I.value()); + return Cost; +} + bool SCEVExpander::isHighCostExpansionHelper( - const SCEV *S, Loop *L, const Instruction &At, int &BudgetRemaining, - const TargetTransformInfo &TTI, SmallPtrSetImpl &Processed, - SmallVectorImpl &Worklist) { + const SCEVOperand &WorkItem, Loop *L, const Instruction &At, + int &BudgetRemaining, const TargetTransformInfo &TTI, + SmallPtrSetImpl &Processed, + SmallVectorImpl &Worklist) { if (BudgetRemaining < 0) return true; // Already run out of budget, give up. + const SCEV *S = WorkItem.S; // Was the cost of expansion of this expression already accounted for? if (!Processed.insert(S).second) return false; // We have already accounted for this expression. @@ -2202,44 +2322,12 @@ bool SCEVExpander::isHighCostExpansionHelper( TargetTransformInfo::TargetCostKind CostKind = TargetTransformInfo::TCK_RecipThroughput; - if (auto *CastExpr = dyn_cast(S)) { - unsigned Opcode; - switch (S->getSCEVType()) { - case scTruncate: - Opcode = Instruction::Trunc; - break; - case scZeroExtend: - Opcode = Instruction::ZExt; - break; - case scSignExtend: - Opcode = Instruction::SExt; - break; - default: - llvm_unreachable("There are no other cast types."); - } - const SCEV *Op = CastExpr->getOperand(); - BudgetRemaining -= TTI.getCastInstrCost( - Opcode, /*Dst=*/S->getType(), - /*Src=*/Op->getType(), TTI::CastContextHint::None, CostKind); - Worklist.emplace_back(Op); + if (isa(S)) { + int Cost = + costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); + BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. - } - - if (auto *UDivExpr = dyn_cast(S)) { - // If the divisor is a power of two count this as a logical right-shift. - if (auto *SC = dyn_cast(UDivExpr->getRHS())) { - if (SC->getAPInt().isPowerOf2()) { - BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(), - CostKind); - // Note that we don't count the cost of RHS, because it is a constant, - // and we consider those to be free. But if that changes, we would need - // to log2() it first before calling isHighCostExpansionHelper(). - Worklist.emplace_back(UDivExpr->getLHS()); - return false; // Will answer upon next entry into this function. - } - } - + } else if (isa(S)) { // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or // HowManyLessThans produced to compute a precise expression, rather than a // UDiv from the user's code. If we can't find a UDiv in the code with some @@ -2252,117 +2340,28 @@ bool SCEVExpander::isHighCostExpansionHelper( SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L)) return false; // Consider it to be free. + int Cost = + costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); // Need to count the cost of this UDiv. - BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(), - CostKind); - Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()}); + BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. - } - - if (const auto *NAry = dyn_cast(S)) { - Type *OpType = NAry->getType(); - - assert(NAry->getNumOperands() >= 2 && - "Polynomial should be at least linear"); - - int AddCost = - TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); - int MulCost = - TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); - - // In this polynominal, we may have some zero operands, and we shouldn't - // really charge for those. So how many non-zero coeffients are there? - int NumTerms = llvm::count_if(NAry->operands(), - [](const SCEV *S) { return !S->isZero(); }); - assert(NumTerms >= 1 && "Polynominal should have at least one term."); - assert(!(*std::prev(NAry->operands().end()))->isZero() && - "Last operand should not be zero"); - - // Much like with normal add expr, the polynominal will require - // one less addition than the number of it's terms. - BudgetRemaining -= AddCost * (NumTerms - 1); - if (BudgetRemaining < 0) - return true; - - // Ignoring constant term (operand 0), how many of the coeffients are u> 1? - int NumNonZeroDegreeNonOneTerms = - llvm::count_if(make_range(std::next(NAry->op_begin()), NAry->op_end()), - [](const SCEV *S) { - auto *SConst = dyn_cast(S); - return !SConst || SConst->getAPInt().ugt(1); - }); - // Here, *each* one of those will require a multiplication. - BudgetRemaining -= MulCost * NumNonZeroDegreeNonOneTerms; - if (BudgetRemaining < 0) - return true; - - // What is the degree of this polynominal? - int PolyDegree = NAry->getNumOperands() - 1; - assert(PolyDegree >= 1 && "Should be at least affine."); - - // The final term will be: - // Op_{PolyDegree} * x ^ {PolyDegree} - // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. - // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for - // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. - // FIXME: this is conservatively correct, but might be overly pessimistic. - BudgetRemaining -= MulCost * (PolyDegree - 1); - if (BudgetRemaining < 0) - return true; - - // And finally, the operands themselves should fit within the budget. - Worklist.insert(Worklist.end(), NAry->operands().begin(), - NAry->operands().end()); - return false; // So far so good, though ops may be too costly? - } - - if (const SCEVNAryExpr *NAry = dyn_cast(S)) { - Type *OpType = NAry->getType(); - - int PairCost; - switch (S->getSCEVType()) { - case scAddExpr: - PairCost = - TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); - break; - case scMulExpr: - // TODO: this is a very pessimistic cost modelling for Mul, - // because of Bin Pow algorithm actually used by the expander, - // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). - PairCost = - TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); - break; - case scSMaxExpr: - case scUMaxExpr: - case scSMinExpr: - case scUMinExpr: - PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType, - CmpInst::makeCmpResultType(OpType), - CostKind) + - TTI.getCmpSelInstrCost(Instruction::Select, OpType, - CmpInst::makeCmpResultType(OpType), - CostKind); - break; - default: - llvm_unreachable("There are no other variants here."); - } - + } else if (const SCEVNAryExpr *NAry = dyn_cast(S)) { assert(NAry->getNumOperands() > 1 && "Nary expr should have more than 1 operand."); // The simple nary expr will require one less op (or pair of ops) // than the number of it's terms. - BudgetRemaining -= PairCost * (NAry->getNumOperands() - 1); - if (BudgetRemaining < 0) - return true; - - // And finally, the operands themselves should fit within the budget. - Worklist.insert(Worklist.end(), NAry->operands().begin(), - NAry->operands().end()); - return false; // So far so good, though ops may be too costly? - } - - llvm_unreachable("No other scev expressions possible."); + int Cost = + costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); + BudgetRemaining -= Cost; + return BudgetRemaining < 0; + } else if (const auto *NAry = dyn_cast(S)) { + assert(NAry->getNumOperands() >= 2 && + "Polynomial should be at least linear"); + BudgetRemaining -= costAndCollectOperands( + WorkItem, TTI, CostKind, Worklist); + return BudgetRemaining < 0; + } else + llvm_unreachable("No other scev expressions possible."); } Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, From 136eb79a8846c4e8ff6ba5ccfc0c470ab351fb13 Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Mon, 7 Sep 2020 11:41:27 +0000 Subject: [PATCH 334/465] [MLIR][Standard] Add `dynamic_tensor_from_elements` operation With `dynamic_tensor_from_elements` tensor values of dynamic size can be created. The body of the operation essentially maps the index space to tensor elements. Declare SCF operations in the `scf` namespace to avoid name clash with the new `std.yield` operation. Resolve ambiguities between `linalg/shape/std/scf.yield` operations. Differential Revision: https://reviews.llvm.org/D86276 --- mlir/include/mlir/Dialect/SCF/SCFOps.td | 8 +-- .../mlir/Dialect/StandardOps/IR/Ops.td | 49 ++++++++++++++ .../Conversion/LinalgToLLVM/LinalgToLLVM.cpp | 3 +- .../SCFToStandard/SCFToStandard.cpp | 4 +- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 7 +- mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp | 2 +- mlir/lib/Dialect/Linalg/Transforms/Loops.cpp | 2 +- .../Linalg/Transforms/Vectorization.cpp | 16 ++--- mlir/lib/Dialect/SCF/SCF.cpp | 8 ++- mlir/lib/Dialect/Shape/IR/Shape.cpp | 2 +- .../Shape/Transforms/ShapeToShapeLowering.cpp | 2 +- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 62 ++++++++++++++++- mlir/test/Dialect/Standard/invalid.mlir | 66 +++++++++++++++++++ mlir/test/Dialect/Standard/ops.mlir | 14 +++- 14 files changed, 219 insertions(+), 26 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td index 78aefec00bf76..59ba50fbe2322 100644 --- a/mlir/include/mlir/Dialect/SCF/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td @@ -19,7 +19,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def SCF_Dialect : Dialect { let name = "scf"; - let cppNamespace = ""; + let cppNamespace = "scf"; } // Base class for SCF dialect ops. @@ -39,7 +39,7 @@ class SCF_Op traits = []> : def ForOp : SCF_Op<"for", [DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, - SingleBlockImplicitTerminator<"YieldOp">, + SingleBlockImplicitTerminator<"scf::YieldOp">, RecursiveSideEffects]> { let summary = "for operation"; let description = [{ @@ -183,7 +183,7 @@ def ForOp : SCF_Op<"for", def IfOp : SCF_Op<"if", [DeclareOpInterfaceMethods, - SingleBlockImplicitTerminator<"YieldOp">, RecursiveSideEffects, + SingleBlockImplicitTerminator<"scf::YieldOp">, RecursiveSideEffects, NoRegionArguments]> { let summary = "if-then-else operation"; let description = [{ @@ -271,7 +271,7 @@ def ParallelOp : SCF_Op<"parallel", [AttrSizedOperandSegments, DeclareOpInterfaceMethods, RecursiveSideEffects, - SingleBlockImplicitTerminator<"YieldOp">]> { + SingleBlockImplicitTerminator<"scf::YieldOp">]> { let summary = "parallel for operation"; let description = [{ The "scf.parallel" operation represents a loop nest taking 4 groups of SSA diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index ae951e824e001..f326ae5578650 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1475,6 +1475,37 @@ def DivFOp : FloatArithmeticOp<"divf"> { let summary = "floating point division operation"; } +//===----------------------------------------------------------------------===// +// DynamicTensorFromElementsOp +//===----------------------------------------------------------------------===// + +def DynamicTensorFromElementsOp : Std_Op<"dynamic_tensor_from_elements", + [RecursiveSideEffects, SingleBlockImplicitTerminator<"YieldOp">]> { + string summary = "Creates a dynamically sized tensor from elements"; + string description = [{ + This operation creates a dynamically sized tensor with elements of any type. + It expects one index operand per dynamic extent of the result tensor. + + The body region defines the tensor's elements. It takes index operands as + its region arguments that span the index space. The element at the given + position is yielded with the `yield` operation (see `YieldOp`). + + Example: + + ```mlir + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index, %k : index): + ... + yield %elem : f32 + } : tensor + ``` + }]; + + let arguments = (ins Variadic:$dynamicExtents); + let results = (outs AnyRankedTensor:$result); + let regions = (region SizedRegion<1>:$body); +} + //===----------------------------------------------------------------------===// // ExpOp //===----------------------------------------------------------------------===// @@ -3252,6 +3283,24 @@ def ViewOp : Std_Op<"view", [ let hasCanonicalizer = 1; } +//===----------------------------------------------------------------------===// +// YieldOp +//===----------------------------------------------------------------------===// + +def YieldOp : Std_Op<"yield", [NoSideEffect, ReturnLike, Terminator, + HasParent<"DynamicTensorFromElementsOp">]> { + let summary = "Yield a value from a region"; + let description = [{ + This operation is used to yield a single value from a within a region. It + is used to create dynamically sized tensors + (see `DynamicTensorFromElementsOp`). + }]; + + let arguments = (ins AnyType:$value); + let assemblyFormat = "$value attr-dict `:` type($value)"; + let verifier = ?; +} + //===----------------------------------------------------------------------===// // XOrOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp index 0460d98b44a47..f38eabb9465d5 100644 --- a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp +++ b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp @@ -339,7 +339,8 @@ class TransposeOpConversion : public ConvertToLLVMPattern { class YieldOpConversion : public ConvertToLLVMPattern { public: explicit YieldOpConversion(MLIRContext *context, LLVMTypeConverter &lowering_) - : ConvertToLLVMPattern(YieldOp::getOperationName(), context, lowering_) {} + : ConvertToLLVMPattern(linalg::YieldOp::getOperationName(), context, + lowering_) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, diff --git a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp b/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp index 34ee48758e9e6..14f365f95ee5a 100644 --- a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp +++ b/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp @@ -356,7 +356,7 @@ ParallelLowering::matchAndRewrite(ParallelOp parallelOp, // A loop is constructed with an empty "yield" terminator if there are // no results. rewriter.setInsertionPointToEnd(rewriter.getInsertionBlock()); - rewriter.create(loc, forOp.getResults()); + rewriter.create(loc, forOp.getResults()); } rewriter.setInsertionPointToStart(forOp.getBody()); @@ -391,7 +391,7 @@ ParallelLowering::matchAndRewrite(ParallelOp parallelOp, if (!yieldOperands.empty()) { rewriter.setInsertionPointToEnd(rewriter.getInsertionBlock()); - rewriter.create(loc, yieldOperands); + rewriter.create(loc, yieldOperands); } rewriter.replaceOp(parallelOp, loopResults); diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index fa45997ae801a..c9b05f89f30b1 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -905,7 +905,7 @@ static ParseResult parseTransposeOp(OpAsmParser &parser, // YieldOp //===----------------------------------------------------------------------===// -static void print(OpAsmPrinter &p, YieldOp op) { +static void print(OpAsmPrinter &p, linalg::YieldOp op) { p << op.getOperationName(); if (op.getNumOperands() > 0) p << ' ' << op.getOperands(); @@ -926,7 +926,8 @@ static ParseResult parseYieldOp(OpAsmParser &parser, OperationState &result) { // Check the operand number and types must match the element types of the // LinalgOp interface's shaped operands. -static LogicalResult verifyYield(YieldOp op, LinalgOp linalgOpInterface) { +static LogicalResult verifyYield(linalg::YieldOp op, + LinalgOp linalgOpInterface) { auto nOutputs = linalgOpInterface.getNumOutputs(); if (op.getNumOperands() != nOutputs) return op.emitOpError("expected number of yield values (") @@ -946,7 +947,7 @@ static LogicalResult verifyYield(YieldOp op, LinalgOp linalgOpInterface) { return success(); } -static LogicalResult verify(YieldOp op) { +static LogicalResult verify(linalg::YieldOp op) { auto *parentOp = op.getParentOp(); if (parentOp->getNumRegions() != 1 || parentOp->getRegion(0).empty()) return op.emitOpError("expected single non-empty parent region"); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index 6c0c841451dae..adbf4a7b80454 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -659,7 +659,7 @@ struct FuseGenericOpsOnTensors { // Add operations from producer (except the yield operation) to the fused // op. for (auto &op : producerBlock.getOperations()) { - if (auto yieldOp = dyn_cast(op)) { + if (auto yieldOp = dyn_cast(op)) { // Lookup the value the yield operation is mapped to. Value yieldVal = yieldOp.getOperand(0); if (Value clonedVal = mapper.lookupOrNull(yieldVal)) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index 281edd9a91f64..d4d1d108be71a 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -147,7 +147,7 @@ static void inlineRegionAndEmitStore(OpType op, ArrayRef indexedValues, } Operation &terminator = block.back(); - assert(isa(terminator) && + assert(isa(terminator) && "expected a yield op in the end of the region"); for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) { IndexedValueType O(outputBuffers[i]); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index c8e20ce57842b..ada89f1c82b5c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -48,14 +48,14 @@ static bool hasMultiplyAddBody(Region &r) { auto c = m_Val(r.getArgument(2)); // TODO: Update this detection once we have matcher support for specifying // that any permutation of operands matches. - auto pattern1 = m_Op(m_Op(m_Op(a, b), c)); - auto pattern2 = m_Op(m_Op(c, m_Op(a, b))); - auto pattern3 = m_Op(m_Op(m_Op(b, a), c)); - auto pattern4 = m_Op(m_Op(c, m_Op(b, a))); - auto pattern5 = m_Op(m_Op(m_Op(a, b), c)); - auto pattern6 = m_Op(m_Op(c, m_Op(a, b))); - auto pattern7 = m_Op(m_Op(m_Op(b, a), c)); - auto pattern8 = m_Op(m_Op(c, m_Op(b, a))); + auto pattern1 = m_Op(m_Op(m_Op(a, b), c)); + auto pattern2 = m_Op(m_Op(c, m_Op(a, b))); + auto pattern3 = m_Op(m_Op(m_Op(b, a), c)); + auto pattern4 = m_Op(m_Op(c, m_Op(b, a))); + auto pattern5 = m_Op(m_Op(m_Op(a, b), c)); + auto pattern6 = m_Op(m_Op(c, m_Op(a, b))); + auto pattern7 = m_Op(m_Op(m_Op(b, a), c)); + auto pattern8 = m_Op(m_Op(c, m_Op(b, a))); return pattern1.match(&r.front().back()) || pattern2.match(&r.front().back()) || pattern3.match(&r.front().back()) || diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp index 6f3f1e4dc0d15..498246315d642 100644 --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -38,7 +38,7 @@ struct SCFInlinerInterface : public DialectInlinerInterface { // as necessary. Required when the region has only one block. void handleTerminator(Operation *op, ArrayRef valuesToRepl) const final { - auto retValOp = dyn_cast(op); + auto retValOp = dyn_cast(op); if (!retValOp) return; @@ -889,7 +889,7 @@ static ParseResult parseYieldOp(OpAsmParser &parser, OperationState &result) { return success(); } -static void print(OpAsmPrinter &p, YieldOp op) { +static void print(OpAsmPrinter &p, scf::YieldOp op) { p << op.getOperationName(); if (op.getNumOperands() != 0) p << ' ' << op.getOperands() << " : " << op.getOperandTypes(); @@ -899,5 +899,9 @@ static void print(OpAsmPrinter &p, YieldOp op) { // TableGen'd op method definitions //===----------------------------------------------------------------------===// +namespace mlir { +namespace scf { #define GET_OP_CLASSES #include "mlir/Dialect/SCF/SCFOps.cpp.inc" +} // namespace scf +} // namespace mlir diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index 511ec9bf2b4e1..bcfaa896f63d2 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -779,7 +779,7 @@ void SizeToIndexOp::getCanonicalizationPatterns( // YieldOp //===----------------------------------------------------------------------===// -static LogicalResult verify(YieldOp op) { +static LogicalResult verify(shape::YieldOp op) { auto *parentOp = op.getParentOp(); auto results = parentOp->getResults(); auto operands = op.getOperands(); diff --git a/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp b/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp index a84fad1f94602..ff74ce069e407 100644 --- a/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp +++ b/mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp @@ -45,7 +45,7 @@ NumElementsOpConverter::matchAndRewrite(NumElementsOp op, OpBuilder b = OpBuilder::atBlockEnd(body); Value product = b.create(loc, valueType, body->getArgument(1), body->getArgument(2)); - b.create(loc, product); + b.create(loc, product); rewriter.replaceOp(op, reduce.result()); return success(); diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index b34257791d78e..65f8b83d9a718 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1312,7 +1312,6 @@ Optional DimOp::getConstantIndex() { } static LogicalResult verify(DimOp op) { - // Assume unknown index to be in range. Optional index = op.getConstantIndex(); if (!index.hasValue()) @@ -1634,6 +1633,67 @@ LogicalResult DmaWaitOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// DynamicTensorFromElementsOp +//===----------------------------------------------------------------------===// + +static ParseResult parseDynamicTensorFromElementsOp(OpAsmParser &parser, + OperationState &result) { + // Parse operands. + SmallVector dynamicExtents; + Type indexTy = parser.getBuilder().getIndexType(); + if (parser.parseOperandList(dynamicExtents) || + parser.resolveOperands(dynamicExtents, indexTy, result.operands)) + return failure(); + + // Parse body. + Region *body = result.addRegion(); + if (parser.parseRegion(*body, {}, {})) + return failure(); + + // Parse result type. + Type resultType; + if (parser.parseOptionalAttrDict(result.attributes) || + parser.parseColonType(resultType)) + return failure(); + result.addTypes(resultType); + + return success(); +} + +static void print(OpAsmPrinter &p, DynamicTensorFromElementsOp op) { + p << "dynamic_tensor_from_elements " << op.dynamicExtents(); + p.printRegion(op.body()); + p.printOptionalAttrDict(op.getAttrs()); + p << " : " << op.getType(); +} + +static LogicalResult verify(DynamicTensorFromElementsOp op) { + // Ensure that the tensor type has as many dynamic dimensions as are specified + // by the operands. + RankedTensorType resultTy = op.getType().cast(); + if (op.getNumOperands() != resultTy.getNumDynamicDims()) + return op.emitError("must have as many index operands as dynamic extents " + "in the result type"); + + // Ensure that region arguments span the index space. + if (!llvm::all_of(op.body().getArgumentTypes(), + [](Type ty) { return ty.isIndex(); })) + return op.emitError("all body arguments must be index"); + if (op.body().getNumArguments() != resultTy.getRank()) + return op.emitError("must have one body argument per input dimension"); + + // Ensure that the region yields an element of the right type. + auto yieldOp = + llvm::cast(op.body().getBlocks().front().getTerminator()); + if (yieldOp.value().getType() != resultTy.getElementType()) + return op.emitOpError( + "body must be terminated with a `yield` operation of the tensor " + "element type"); + + return success(); +} + //===----------------------------------------------------------------------===// // ExtractElementOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Standard/invalid.mlir b/mlir/test/Dialect/Standard/invalid.mlir index f2b71f634cd3d..7f9c564e74f3f 100644 --- a/mlir/test/Dialect/Standard/invalid.mlir +++ b/mlir/test/Dialect/Standard/invalid.mlir @@ -15,3 +15,69 @@ func @test_index_cast_tensor_error(%arg0 : tensor) -> i64 { %0 = index_cast %arg0 : tensor to i64 return %0 : i64 } + +// ----- + +func @dynamic_tensor_from_elements(%m : index) + -> tensor { + // expected-error @+1 {{must have as many index operands as dynamic extents in the result type}} + %tnsr = dynamic_tensor_from_elements %m { + ^bb0(%i : index, %j : index, %k : index): + %elem = constant 8.0 : f32 + yield %elem : f32 + } : tensor + return %tnsr : tensor +} + +// ----- + +func @dynamic_tensor_from_elements(%m : index, %n : index) + -> tensor { + // expected-error @+1 {{must have one body argument per input dimension}} + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index): + %elem = constant 8.0 : f32 + yield %elem : f32 + } : tensor + return %tnsr : tensor +} + +// ----- + +func @dynamic_tensor_from_elements(%m : index, %n : index) + -> tensor { + // expected-error @+1 {{all body arguments must be index}} + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index, %k : i64): + %elem = constant 8.0 : f32 + yield %elem : f32 + } : tensor + return %tnsr : tensor +} + +// ----- + +func @dynamic_tensor_from_elements(%m : index, %n : index) + -> tensor { + // expected-error @+2 {{op expects regions to end with 'std.yield', found 'std.return'}} + // expected-note @+1 {{in custom textual format, the absence of terminator implies 'std.yield'}} + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index, %k : index): + %elem = constant 8.0 : f32 + return %elem : f32 + } : tensor + return %tnsr : tensor +} + +// ----- + +func @dynamic_tensor_from_elements(%m : index, %n : index) + -> tensor { + // expected-error @+1 {{body must be terminated with a `yield` operation of the tensor element type}} + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index, %k : index): + %elem = constant 8 : i32 + yield %elem : i32 + } : tensor + return %tnsr : tensor +} diff --git a/mlir/test/Dialect/Standard/ops.mlir b/mlir/test/Dialect/Standard/ops.mlir index 24da04eebaaa6..a765acb9657b5 100644 --- a/mlir/test/Dialect/Standard/ops.mlir +++ b/mlir/test/Dialect/Standard/ops.mlir @@ -1,4 +1,5 @@ -// RUN: mlir-opt -split-input-file %s | FileCheck %s +// RUN: mlir-opt %s | mlir-opt | FileCheck %s +// RUN: mlir-opt %s --mlir-print-op-generic | mlir-opt | FileCheck %s // CHECK-LABEL: test_index_cast func @test_index_cast(%arg0 : index) -> i64 { @@ -22,3 +23,14 @@ func @assert(%arg : i1) { assert %arg, "Some message in case this assertion fails." return } + +func @dynamic_tensor_from_elements(%m : index, %n : index) + -> tensor { + %tnsr = dynamic_tensor_from_elements %m, %n { + ^bb0(%i : index, %j : index, %k : index): + %elem = constant 8.0 : f32 + yield %elem : f32 + } : tensor + return %tnsr : tensor +} + From 3097427f93dde9a49f729e995b8d52d91cc30d4c Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Mon, 7 Sep 2020 19:44:46 +0800 Subject: [PATCH 335/465] [obj2yaml] Add support for dumping the .debug_str section. This patch adds support for dumping the .debug_str section to obj2yaml. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D86867 --- .../tools/obj2yaml/ELF/DWARF/debug-str.yaml | 101 ++++++++++++++++++ llvm/tools/obj2yaml/elf2yaml.cpp | 33 ++++-- 2 files changed, 125 insertions(+), 9 deletions(-) create mode 100644 llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml diff --git a/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml new file mode 100644 index 0000000000000..e058642877243 --- /dev/null +++ b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml @@ -0,0 +1,101 @@ +## Test how we dump the .debug_str section. + +## a) Test dumping a .debug_str section with a default section header. + +# RUN: yaml2obj --docnum=1 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefix=BASIC --implicit-check-not='Name: .debug_str' + +## b) Test dumping a .debug_str section whose section header properties are overridden. + +## Override the sh_type field. +# RUN: yaml2obj --docnum=1 -DTYPE=STRTAB %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON \ +# RUN: -DTYPE=STRTAB -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,ADDRALIGN=1 + +## Override the sh_flags field. +# RUN: yaml2obj --docnum=1 -DFLAGS=[SHF_ALLOC] %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_ALLOC ]" -D#%x,ADDRALIGN=1 + +## Override the sh_link field. +# RUN: yaml2obj --docnum=1 -DLINK=.sec %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON,LINK \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -DLINK=.sec -D#%x,ADDRALIGN=1 + +## Override the sh_addr field. +# RUN: yaml2obj --docnum=1 -DADDRESS=0x2020 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON,ADDRESS \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,ADDRALIGN=1 -D#%x,ADDRESS=0x2020 + +## Override the sh_addralign field. +# RUN: yaml2obj --docnum=1 -DADDRALIGN=3 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,ADDRALIGN=3 + +## Override the sh_entsize field (sh_entsize=3). +# RUN: yaml2obj --docnum=1 -DENTSIZE=3 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON,ENTSIZE \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,ADDRALIGN=1 -D#%x,ENTSIZE=3 + +## Override the sh_entsize field (sh_entsize=0). +# RUN: yaml2obj --docnum=1 -DENTSIZE=0 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON,ENTSIZE \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,ADDRALIGN=1 -D#%x,ENTSIZE=0 + +## Override the sh_info field. +# RUN: yaml2obj --docnum=1 -DINFO=3 %s | obj2yaml | \ +# RUN: FileCheck %s --check-prefixes=BASIC,COMMON,INFO \ +# RUN: -DTYPE=PROGBITS -DFLAGS="[ SHF_MERGE, SHF_STRINGS ]" -D#%x,INFO=3 -D#%x,ADDRALIGN=1 -D#%x,ENTSIZE=1 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .debug_str + Type: SHT_[[TYPE=PROGBITS]] + Flags: [[FLAGS=]] + Link: [[LINK='']] + EntSize: [[ENTSIZE=1]] + Info: [[INFO=]] + AddressAlign: [[ADDRALIGN=1]] + Address: [[ADDRESS=]] + - Name: .sec + Type: SHT_PROGBITS +DWARF: + debug_str: + - a + - b + - abc + +# COMMON: - Name: .debug_str +# COMMON-NEXT: Type: SHT_[[TYPE]] +# COMMON-NEXT: Flags: [[FLAGS]] +# LINK-NEXT: Link: .sec +# ADDRESS-NEXT: Address: 0x[[#%.16x,ADDRESS]] +# COMMON-NEXT: AddressAlign: 0x[[#%.16x,ADDRALIGN]] +# ENTSIZE-NEXT: EntSize: 0x[[#%.16x,ENTSIZE]] +# INFO-NEXT: Info: 0x[[#%.16x,INFO]] +# BASIC: DWARF: +# BASIC-NEXT: debug_str: +# BASIC-NEXT: - a +# BASIC-NEXT: - b +# BASIC-NEXT: - abc +# BASIC-NEXT: ... + +## c) Test dumping an empty .debug_str section. + +# RUN: yaml2obj --docnum=2 %s | obj2yaml | FileCheck %s --check-prefix=EMPTY --implicit-check-not=Sections + +# EMPTY: DWARF: +# EMPTY-NEXT: debug_str: [] +# EMPTY-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +DWARF: + debug_str: [] diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 632ec1bc9af41..9f524479bb04c 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -197,12 +197,22 @@ bool ELFDumper::shouldPrintSection(const ELFYAML::Section &S, // entry but their section headers may have special flags, entry size, address // alignment, etc. We will preserve the header for them under such // circumstances. - if (DWARF && DWARF->getNonEmptySectionNames().count(S.Name.substr(1))) { + StringRef SecName = S.Name.substr(1); + if (DWARF && DWARF->getNonEmptySectionNames().count(SecName)) { if (const ELFYAML::RawContentSection *RawSec = - dyn_cast(&S)) - return RawSec->Type != ELF::SHT_PROGBITS || RawSec->Flags || - !RawSec->Link.empty() || RawSec->Info || - RawSec->AddressAlign != 1 || RawSec->EntSize; + dyn_cast(&S)) { + if (RawSec->Type != ELF::SHT_PROGBITS || !RawSec->Link.empty() || + RawSec->Info || RawSec->AddressAlign != 1 || RawSec->Address || + RawSec->EntSize) + return true; + + ELFYAML::ELF_SHF ShFlags = RawSec->Flags.getValueOr(ELFYAML::ELF_SHF(0)); + + if (SecName == "debug_str") + return ShFlags != ELFYAML::ELF_SHF(ELF::SHF_MERGE | ELF::SHF_STRINGS); + + return ShFlags != 0; + } } // Normally we use "Symbols:" and "DynamicSymbols:" to describe contents of @@ -404,6 +414,8 @@ Optional ELFDumper::dumpDWARFSections( if (RawSec->Name == ".debug_aranges") Err = dumpDebugARanges(*DWARFCtx.get(), DWARF); + else if (RawSec->Name == ".debug_str") + dumpDebugStrings(*DWARFCtx.get(), DWARF); // If the DWARF section cannot be successfully parsed, emit raw content // instead of an entry in the DWARF section of the YAML. @@ -622,7 +634,8 @@ Error ELFDumper::dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab, } template -static unsigned getDefaultShEntSize(ELFYAML::ELF_SHT SecType) { +static unsigned getDefaultShEntSize(ELFYAML::ELF_SHT SecType, + StringRef SecName) { switch (SecType) { case ELF::SHT_REL: return sizeof(typename ELFT::Rel); @@ -633,6 +646,8 @@ static unsigned getDefaultShEntSize(ELFYAML::ELF_SHT SecType) { case ELF::SHT_DYNAMIC: return sizeof(typename ELFT::Dyn); default: + if (SecName == ".debug_str") + return 1; return 0; } } @@ -649,9 +664,6 @@ Error ELFDumper::dumpCommonSection(const Elf_Shdr *Shdr, S.Address = static_cast(Shdr->sh_addr); S.AddressAlign = Shdr->sh_addralign; - if (Shdr->sh_entsize != getDefaultShEntSize(S.Type)) - S.EntSize = static_cast(Shdr->sh_entsize); - S.OriginalSecNdx = Shdr - &Sections[0]; auto NameOrErr = getUniquedSectionName(Shdr); @@ -659,6 +671,9 @@ Error ELFDumper::dumpCommonSection(const Elf_Shdr *Shdr, return NameOrErr.takeError(); S.Name = NameOrErr.get(); + if (Shdr->sh_entsize != getDefaultShEntSize(S.Type, S.Name)) + S.EntSize = static_cast(Shdr->sh_entsize); + if (Shdr->sh_link != ELF::SHN_UNDEF) { auto LinkSection = Obj.getSection(Shdr->sh_link); if (!LinkSection) From 9b645ebfff168fcf3cf29b21f49762a04d8ceb37 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 12:50:32 +0100 Subject: [PATCH 336/465] [X86][AVX] Use lowerShuffleWithPERMV in shuffle combining to support non-VLX targets lowerShuffleWithPERMV allows us to use the ZMM variants for 128/256-bit variable shuffles on non-VLX AVX512 targets. This is another step towards shuffle combining through between vector widths - we still end up with an annoying regression (combine_vpermilvar_vperm2f128_zero_8f32) but we're going in the right direction.... --- llvm/lib/Target/X86/X86ISelLowering.cpp | 104 +++++++++--------- llvm/test/CodeGen/X86/insertelement-ones.ll | 10 +- .../X86/shuffle-strided-with-offset-256.ll | 52 ++++----- .../X86/shuffle-strided-with-offset-512.ll | 23 ++-- .../CodeGen/X86/vector-shuffle-512-v16.ll | 9 +- .../test/CodeGen/X86/vector-shuffle-avx512.ll | 35 +++--- .../X86/vector-shuffle-combining-avx.ll | 17 ++- .../X86/vector-shuffle-combining-avx2.ll | 29 +++-- llvm/test/CodeGen/X86/vector-zext.ll | 19 +++- 9 files changed, 164 insertions(+), 134 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 09855fd0eb925..a8a41d9a1bb75 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35240,44 +35240,48 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) { // If we have a single input lane-crossing shuffle then lower to VPERMV. - if (UnaryShuffle && AllowVariableMask && !MaskContainsZeros && - ((Subtarget.hasAVX2() && - (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || - (Subtarget.hasAVX512() && - (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || - MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || - (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || - (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || - (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { - SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); - Res = DAG.getBitcast(MaskVT, V1); - Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res); - return DAG.getBitcast(RootVT, Res); + if (UnaryShuffle && AllowVariableMask && !MaskContainsZeros) { + if (Subtarget.hasAVX2() && + (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) { + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); + Res = DAG.getBitcast(MaskVT, V1); + Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res); + return DAG.getBitcast(RootVT, Res); + } + // AVX512 variants (non-VLX will pad to 512-bit shuffles). + if ((Subtarget.hasAVX512() && + (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || + MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || + (Subtarget.hasBWI() && + (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || + (Subtarget.hasVBMI() && + (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8))) { + V1 = DAG.getBitcast(MaskVT, V1); + V2 = DAG.getUNDEF(MaskVT); + Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); + return DAG.getBitcast(RootVT, Res); + } } // Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero - // vector as the second source. + // vector as the second source (non-VLX will pad to 512-bit shuffles). if (UnaryShuffle && AllowVariableMask && ((Subtarget.hasAVX512() && (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || + MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || + MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasVLX() && - (MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || - MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || - (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || - (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || - (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || - (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { + (Subtarget.hasBWI() && + (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || + (Subtarget.hasVBMI() && + (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { // Adjust shuffle mask - replace SM_SentinelZero with second source index. for (unsigned i = 0; i != NumMaskElts; ++i) if (Mask[i] == SM_SentinelZero) Mask[i] = NumMaskElts + i; - - SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); - Res = DAG.getBitcast(MaskVT, V1); - SDValue Zero = getZeroVector(MaskVT, Subtarget, DAG, DL); - Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, Res, VPermMask, Zero); + V1 = DAG.getBitcast(MaskVT, V1); + V2 = getZeroVector(MaskVT, Subtarget, DAG, DL); + Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); return DAG.getBitcast(RootVT, Res); } @@ -35288,22 +35292,21 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DAG, Subtarget)) return WideShuffle; - // If we have a dual input lane-crossing shuffle then lower to VPERMV3. + // If we have a dual input lane-crossing shuffle then lower to VPERMV3, + // (non-VLX will pad to 512-bit shuffles). if (AllowVariableMask && !MaskContainsZeros && ((Subtarget.hasAVX512() && (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || - MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasVLX() && - (MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || + MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || + MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32 || MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || - (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || - (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || - (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || - (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { - SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); + (Subtarget.hasBWI() && + (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || + (Subtarget.hasVBMI() && + (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { V1 = DAG.getBitcast(MaskVT, V1); V2 = DAG.getBitcast(MaskVT, V2); - Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2); + Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); return DAG.getBitcast(RootVT, Res); } return SDValue(); @@ -35460,25 +35463,22 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DAG, Subtarget)) return WideShuffle; - // If we have a dual input shuffle then lower to VPERMV3. + // If we have a dual input shuffle then lower to VPERMV3, + // (non-VLX will pad to 512-bit shuffles) if (!UnaryShuffle && AllowVariableMask && !MaskContainsZeros && ((Subtarget.hasAVX512() && - (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || - MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || - (Subtarget.hasVLX() && - (MaskVT == MVT::v2f64 || MaskVT == MVT::v2i64 || MaskVT == MVT::v4f64 || - MaskVT == MVT::v4i64 || MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || - MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || - (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || - (Subtarget.hasBWI() && Subtarget.hasVLX() && - (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16)) || - (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || - (Subtarget.hasVBMI() && Subtarget.hasVLX() && - (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8)))) { - SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); + (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v8f64 || + MaskVT == MVT::v2i64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8i64 || + MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || MaskVT == MVT::v8f32 || + MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 || + MaskVT == MVT::v16i32)) || + (Subtarget.hasBWI() && (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || + MaskVT == MVT::v32i16)) || + (Subtarget.hasVBMI() && (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || + MaskVT == MVT::v64i8)))) { V1 = DAG.getBitcast(MaskVT, V1); V2 = DAG.getBitcast(MaskVT, V2); - Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2); + Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); return DAG.getBitcast(RootVT, Res); } diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll index 6a9a401264c56..67d8479cf7365 100644 --- a/llvm/test/CodeGen/X86/insertelement-ones.ll +++ b/llvm/test/CodeGen/X86/insertelement-ones.ll @@ -299,11 +299,11 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) { ; ; AVX512F-LABEL: insert_v16i16_x12345x789ABCDEx: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] -; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15] -; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [32,1,2,3,4,5,38,7,8,9,10,11,12,13,14,47] +; AVX512F-NEXT: vpermt2w %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: insert_v16i16_x12345x789ABCDEx: diff --git a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll index 51df4c0505b54..3f6b85c97c400 100644 --- a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll +++ b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll @@ -71,13 +71,12 @@ define void @shuffle_v16i16_to_v8i16_1(<16 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v16i16_to_v8i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [1,3,5,7,33,35,37,39] +; AVX512BW-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v16i16_to_v8i16_1: @@ -252,13 +251,12 @@ define void @shuffle_v16i16_to_v4i16_1(<16 x i16>* %L, <4 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v16i16_to_v4i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vmovq %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [1,5,33,37,4,5,36,37] +; AVX512BW-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovq %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v16i16_to_v4i16_1: @@ -329,13 +327,12 @@ define void @shuffle_v16i16_to_v4i16_2(<16 x i16>* %L, <4 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v16i16_to_v4i16_2: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vmovq %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [2,6,34,38,2,3,34,35] +; AVX512BW-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovq %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v16i16_to_v4i16_2: @@ -406,13 +403,12 @@ define void @shuffle_v16i16_to_v4i16_3(<16 x i16>* %L, <4 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v16i16_to_v4i16_3: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vmovq %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [3,7,35,39,2,3,34,35] +; AVX512BW-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovq %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v16i16_to_v4i16_3: diff --git a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll index e0994e5b58faf..d789f0e1d39f7 100644 --- a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll @@ -31,13 +31,13 @@ define void @shuffle_v64i8_to_v32i8_1(<64 x i8>* %L, <32 x i8>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v64i8_to_v32i8_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm1 -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u] -; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX512BW-NEXT: vmovdqa %ymm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,9,11] +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u,17,19,21,23,25,27,29,31,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %ymm1, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -129,10 +129,11 @@ define void @shuffle_v16i32_to_v8i32_1(<16 x i32>* %L, <8 x i32>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v16i32_to_v8i32_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovaps (%rdi), %ymm0 -; AVX512BW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],mem[1,3],ymm0[5,7],mem[5,7] -; AVX512BW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX512BW-NEXT: vmovaps %ymm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,17,19,21,23] +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm2 +; AVX512BW-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %ymm1, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index 4fce1a38a754a..7d3e8f66ed394 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -378,11 +378,10 @@ define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) { define <8 x float> @test_v16f32_0_1_2_3_4_6_7_10 (<16 x float> %v) { ; ALL-LABEL: test_v16f32_0_1_2_3_4_6_7_10: ; ALL: # %bb.0: -; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm1 -; ALL-NEXT: vmovsldup {{.*#+}} xmm1 = xmm1[0,0,2,2] -; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 -; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,u] -; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7] +; ALL-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,6,7,10,0,1,2,3,4,6,7,10] +; ALL-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] +; ALL-NEXT: vpermd %zmm0, %zmm1, %zmm0 +; ALL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; ALL-NEXT: retq %res = shufflevector <16 x float> %v, <16 x float> undef, <8 x i32> ret <8 x float> %res diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll index cb2dd3ef7e86d..ccf1476e6a657 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -35,11 +35,11 @@ define <8 x float> @expand1(<4 x float> %a ) { ; ; KNL-LABEL: expand1: ; KNL: # %bb.0: -; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; KNL-NEXT: vmovaps {{.*#+}} ymm1 = -; KNL-NEXT: vpermps %ymm0, %ymm1, %ymm0 -; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] +; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; KNL-NEXT: vmovaps {{.*#+}} ymm1 = [16,0,18,1,20,2,22,3] +; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; KNL-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 +; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; KNL-NEXT: ret{{[l|q]}} %res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> ret <8 x float> %res @@ -268,10 +268,11 @@ define <8 x float> @expand14(<4 x float> %a) { ; ; KNL-LABEL: expand14: ; KNL: # %bb.0: -; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; KNL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] -; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7] +; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; KNL-NEXT: vmovaps {{.*#+}} ymm1 = [16,17,0,19,1,21,22,23] +; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; KNL-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 +; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; KNL-NEXT: ret{{[l|q]}} %addV = fadd <4 x float> , %res = shufflevector <4 x float> %addV, <4 x float> %a, <8 x i32> @@ -476,9 +477,11 @@ define <8 x float> @test_masked_permps_v8f32(<8 x float>* %vp, <8 x float> %vec2 ; ; KNL64-LABEL: test_masked_permps_v8f32: ; KNL64: # %bb.0: -; KNL64-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,2,3,7,6,6,7] -; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,0,2,3] -; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5],ymm0[6,7] +; KNL64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; KNL64-NEXT: vmovaps (%rdi), %ymm1 +; KNL64-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23] +; KNL64-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 +; KNL64-NEXT: vmovaps %ymm1, %ymm0 ; KNL64-NEXT: retq ; ; SKX32-LABEL: test_masked_permps_v8f32: @@ -492,10 +495,12 @@ define <8 x float> @test_masked_permps_v8f32(<8 x float>* %vp, <8 x float> %vec2 ; ; KNL32-LABEL: test_masked_permps_v8f32: ; KNL32: # %bb.0: +; KNL32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL32-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,2,3,7,6,6,7] -; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,0,2,3] -; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5],ymm0[6,7] +; KNL32-NEXT: vmovaps (%eax), %ymm1 +; KNL32-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23] +; KNL32-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 +; KNL32-NEXT: vmovaps %ymm1, %ymm0 ; KNL32-NEXT: retl %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index e744dbd103362..47c1e67e096a0 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -132,10 +132,19 @@ define <8 x float> @combine_vpermilvar_vperm2f128_8f32(<8 x float> %a0) { } define <8 x float> @combine_vpermilvar_vperm2f128_zero_8f32(<8 x float> %a0) { -; CHECK-LABEL: combine_vpermilvar_vperm2f128_zero_8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] -; CHECK-NEXT: ret{{[l|q]}} +; AVX-LABEL: combine_vpermilvar_vperm2f128_zero_8f32: +; AVX: # %bb.0: +; AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] +; AVX-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: combine_vpermilvar_vperm2f128_zero_8f32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [16,17,18,19,3,2,1,0] +; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 +; AVX512-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; AVX512-NEXT: ret{{[l|q]}} %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> ) %2 = shufflevector <8 x float> %1, <8 x float> zeroinitializer, <8 x i32> %3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> ) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index a4d7b26ef8690..e7287162dfcb8 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -817,15 +817,26 @@ define <32 x i8> @PR27320(<8 x i32> %a0) { } define internal fastcc <8 x float> @PR34577(<8 x float> %inp0, <8 x float> %inp1, <8 x float> %inp2) { -; CHECK-LABEL: PR34577: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1] -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] -; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = -; CHECK-NEXT: vpermps %ymm1, %ymm2, %ymm1 -; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] -; CHECK-NEXT: ret{{[l|q]}} +; AVX2-LABEL: PR34577: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1] +; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = +; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] +; AVX2-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: PR34577: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1] +; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1],ymm2[2,3],ymm0[4,5],ymm2[6,7] +; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = <23,18,7,2,20,u,3,2> +; AVX512-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512-NEXT: ret{{[l|q]}} entry: %shuf0 = shufflevector <8 x float> %inp0, <8 x float> %inp2, <8 x i32> %sel = select <8 x i1> , <8 x float> %shuf0, <8 x float> zeroinitializer diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index d97b9a359b1ae..b43510f7fd194 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -1905,11 +1905,20 @@ define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX2-NEXT: retq ; -; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] -; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512-NEXT: retq +; AVX512F-LABEL: shuf_zext_8i16_to_4i64_offset2: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] +; AVX512F-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuf_zext_8i16_to_4i64_offset2: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [2,33,34,35,3,37,38,39,4,41,42,43,5,45,46,47] +; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512BW-NEXT: retq entry: %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> %Z = bitcast <16 x i16> %B to <4 x i64> From 5bb27e735d3ba561b93a12e07d79cd88a5bff338 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 12:56:27 +0100 Subject: [PATCH 337/465] X86AvoidStoreForwardingBlocks.cpp - use unsigned for Opcode values. NFCI. Fixes clang-tidy cppcoreguidelines-narrowing-conversions warnings. --- llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp index 9f1fece1b9dd8..fdc65acffe3d8 100644 --- a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -154,7 +154,7 @@ static bool isPotentialBlockedMemCpyLd(unsigned Opcode) { return isXMMLoadOpcode(Opcode) || isYMMLoadOpcode(Opcode); } -static bool isPotentialBlockedMemCpyPair(int LdOpcode, int StOpcode) { +static bool isPotentialBlockedMemCpyPair(unsigned LdOpcode, unsigned StOpcode) { switch (LdOpcode) { case X86::MOVUPSrm: case X86::MOVAPSrm: @@ -206,7 +206,7 @@ static bool isPotentialBlockedMemCpyPair(int LdOpcode, int StOpcode) { } } -static bool isPotentialBlockingStoreInst(int Opcode, int LoadOpcode) { +static bool isPotentialBlockingStoreInst(unsigned Opcode, unsigned LoadOpcode) { bool PBlock = false; PBlock |= Opcode == X86::MOV64mr || Opcode == X86::MOV64mi32 || Opcode == X86::MOV32mr || Opcode == X86::MOV32mi || From 3a86eb03d54ef80cf498d9473a1c735c93cdfa66 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Fri, 4 Sep 2020 15:25:36 +0300 Subject: [PATCH 338/465] [llvm-readobj/elf] - Introduce Relocation helper. It removes templating for Elf_Rel[a] handling that we introduced earlier and introduces a helper class instead. It was briefly discussed in D87087, which showed, why having templates is probably not ideal for the generalization of dumpers code. Differential revision: https://reviews.llvm.org/D87141 --- llvm/tools/llvm-readobj/ELFDumper.cpp | 222 +++++++++++--------------- 1 file changed, 93 insertions(+), 129 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index ab8b546a7b764..d7312eaf2c93a 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -205,6 +205,24 @@ struct VerNeed { } // namespace +template class Relocation { +public: + Relocation(const typename ELFT::Rel &R, bool IsMips64EL) + : Type(R.getType(IsMips64EL)), Symbol(R.getSymbol(IsMips64EL)), + Offset(R.r_offset), Info(R.r_info) {} + + Relocation(const typename ELFT::Rela &R, bool IsMips64EL) + : Relocation((const typename ELFT::Rel &)R, IsMips64EL) { + Addend = R.r_addend; + } + + uint32_t Type; + uint32_t Symbol; + typename ELFT::uint Offset; + typename ELFT::uint Info; + Optional Addend; +}; + template class ELFDumper : public ObjDumper { public: ELFDumper(const object::ELFObjectFile *ObjF, ScopedPrinter &Writer); @@ -370,9 +388,8 @@ template class ELFDumper : public ObjDumper { Expected> getVersionDependencies(const Elf_Shdr *Sec) const; - template - Expected> getRelocationTarget(const Elf_Shdr *SymTab, - const RelTy &R) const; + Expected> getRelocationTarget(const Relocation &R, + const Elf_Shdr *SymTab) const; std::function WarningHandler; void reportUniqueWarning(Error Err) const; @@ -754,10 +771,8 @@ template class DumpStyle { function_ref OnSectionStart, function_ref OnSectionEntry); - virtual void printRelReloc(const Elf_Rel &R, unsigned RelIndex, - const Elf_Shdr *Sec, const Elf_Shdr *SymTab) = 0; - virtual void printRelaReloc(const Elf_Rela &R, unsigned RelIndex, - const Elf_Shdr *Sec, const Elf_Shdr *SymTab) = 0; + virtual void printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) = 0; virtual void printRelrReloc(const Elf_Relr &R) = 0; void printRelocationsHelper(const Elf_Shdr &Sec); @@ -870,23 +885,18 @@ template class GNUStyle : public DumpStyle { } void printHashedSymbol(const Elf_Sym *FirstSym, uint32_t Sym, StringRef StrTable, uint32_t Bucket); - void printRelReloc(const Elf_Rel &R, unsigned RelIndex, const Elf_Shdr *Sec, - const Elf_Shdr *SymTab) override; - void printRelaReloc(const Elf_Rela &R, unsigned RelIndex, const Elf_Shdr *Sec, - const Elf_Shdr *SymTab) override; + void printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) override; void printRelrReloc(const Elf_Relr &R) override; - template - void printRelRelaReloc(const RelTy &R, unsigned RelIndex, const Elf_Shdr &Sec, - const Elf_Shdr *SymTab); - template - void printRelRelaReloc(const RelTy &R, const RelSymbol &RelSym); + void printRelRelaReloc(const Relocation &R, + const RelSymbol &RelSym); void printSymbol(const Elf_Sym *Symbol, const Elf_Sym *First, Optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed) override; std::string getSymbolSectionNdx(const Elf_Sym *Symbol, const Elf_Sym *FirstSym); - template void printDynamicRelocation(const RelTy &R); + void printDynamicRelocation(const Relocation &R); void printProgramHeaders(); void printSectionMapping(); void printGNUVersionSectionProlog(const typename ELFT::Shdr *Sec, @@ -938,15 +948,11 @@ template class LLVMStyle : public DumpStyle { void printMipsABIFlags(const ELFObjectFile *Obj) override; private: - void printRelReloc(const Elf_Rel &R, unsigned RelIndex, const Elf_Shdr *Sec, - const Elf_Shdr *SymTab) override; - void printRelaReloc(const Elf_Rela &R, unsigned RelIndex, const Elf_Shdr *Sec, - const Elf_Shdr *SymTab) override; + void printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) override; void printRelrReloc(const Elf_Relr &R) override; - template - void printRelRelaReloc(const RelTy &R, unsigned RelIndex, const Elf_Shdr &Sec, - const Elf_Shdr *SymTab); - template void printDynamicRelocation(const RelTy &Rel); + + void printDynamicRelocation(const Relocation &R); void printSymbols(); void printDynamicSymbols(); @@ -1058,12 +1064,15 @@ Expected ELFDumper::getSymbolVersion(const Elf_Sym *Sym, } template -template Expected> -ELFDumper::getRelocationTarget(const Elf_Shdr *SymTab, - const RelTy &R) const { - const ELFFile *Obj = ObjF->getELFFile(); - Expected SymOrErr = Obj->getRelocationSymbol(&R, SymTab); +ELFDumper::getRelocationTarget(const Relocation &R, + const Elf_Shdr *SymTab) const { + if (R.Symbol == 0) + return RelSymbol(nullptr, ""); + + const ELFFile &Obj = *ObjF->getELFFile(); + Expected SymOrErr = + Obj.template getEntry(SymTab, R.Symbol); if (!SymOrErr) return SymOrErr.takeError(); const Elf_Sym *Sym = *SymOrErr; @@ -1074,20 +1083,20 @@ ELFDumper::getRelocationTarget(const Elf_Shdr *SymTab, // This code block returns the section name. if (Sym->getType() == ELF::STT_SECTION) { Expected SecOrErr = - Obj->getSection(Sym, SymTab, ShndxTable); + Obj.getSection(Sym, SymTab, ShndxTable); if (!SecOrErr) return SecOrErr.takeError(); // A section symbol describes the section at index 0. if (*SecOrErr == nullptr) return RelSymbol(Sym, ""); - Expected NameOrErr = Obj->getSectionName(*SecOrErr); + Expected NameOrErr = Obj.getSectionName(*SecOrErr); if (!NameOrErr) return NameOrErr.takeError(); return RelSymbol(Sym, NameOrErr->str()); } - Expected StrTableOrErr = Obj->getStringTableForSymtab(*SymTab); + Expected StrTableOrErr = Obj.getStringTableForSymtab(*SymTab); if (!StrTableOrErr) return StrTableOrErr.takeError(); @@ -3607,30 +3616,10 @@ template void GNUStyle::printGroupSections() { } template -void GNUStyle::printRelReloc(const Elf_Rel &R, unsigned RelIndex, - const Elf_Shdr *Sec, - const Elf_Shdr *SymTab) { - printRelRelaReloc(R, RelIndex, *Sec, SymTab); -} - -template -void GNUStyle::printRelaReloc(const Elf_Rela &R, unsigned RelIndex, - const Elf_Shdr *Sec, - const Elf_Shdr *SymTab) { - printRelRelaReloc(R, RelIndex, *Sec, SymTab); -} - -template void GNUStyle::printRelrReloc(const Elf_Relr &R) { - OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8)) << "\n"; -} - -template -template -void GNUStyle::printRelRelaReloc(const RelTy &R, unsigned RelIndex, - const Elf_Shdr &Sec, - const Elf_Shdr *SymTab) { +void GNUStyle::printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { Expected> Target = - this->dumper()->getRelocationTarget(SymTab, R); + this->dumper()->getRelocationTarget(R, SymTab); if (!Target) this->reportUniqueWarning(createError( "unable to print relocation " + Twine(RelIndex) + " in " + @@ -3639,30 +3628,23 @@ void GNUStyle::printRelRelaReloc(const RelTy &R, unsigned RelIndex, printRelRelaReloc(R, *Target); } -template -static Optional getAddend(const typename ELFT::Rela &R) { - return (int64_t)R.r_addend; -} - -template -static Optional getAddend(const typename ELFT::Rel &) { - return None; +template void GNUStyle::printRelrReloc(const Elf_Relr &R) { + OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8)) << "\n"; } template -template -void GNUStyle::printRelRelaReloc(const RelTy &R, +void GNUStyle::printRelRelaReloc(const Relocation &R, const RelSymbol &RelSym) { // First two fields are bit width dependent. The rest of them are fixed width. unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias}; unsigned Width = ELFT::Is64Bits ? 16 : 8; - Fields[0].Str = to_string(format_hex_no_prefix(R.r_offset, Width)); - Fields[1].Str = to_string(format_hex_no_prefix(R.r_info, Width)); + Fields[0].Str = to_string(format_hex_no_prefix(R.Offset, Width)); + Fields[1].Str = to_string(format_hex_no_prefix(R.Info, Width)); SmallString<32> RelocName; - this->Obj.getRelocationTypeName(R.getType(this->Obj.isMips64EL()), RelocName); + this->Obj.getRelocationTypeName(R.Type, RelocName); Fields[2].Str = RelocName.c_str(); if (RelSym.Sym) @@ -3674,7 +3656,7 @@ void GNUStyle::printRelRelaReloc(const RelTy &R, printField(F); std::string Addend; - if (Optional A = getAddend(R)) { + if (Optional A = R.Addend) { int64_t RelAddend = *A; if (!RelSym.Name.empty()) { if (RelAddend < 0) { @@ -4357,16 +4339,15 @@ template void GNUStyle::printSectionMapping() { namespace { -template +template RelSymbol getSymbolForReloc(const ELFFile &Obj, StringRef FileName, const ELFDumper *Dumper, - const RelTy &Reloc) { - uint32_t SymIndex = Reloc.getSymbol(Obj.isMips64EL()); + const Relocation &Reloc) { auto WarnAndReturn = [&](const typename ELFT::Sym *Sym, const Twine &Reason) -> RelSymbol { reportWarning( createError("unable to get name of the dynamic symbol with index " + - Twine(SymIndex) + ": " + Reason), + Twine(Reloc.Symbol) + ": " + Reason), FileName); return {Sym, ""}; }; @@ -4379,13 +4360,13 @@ RelSymbol getSymbolForReloc(const ELFFile &Obj, StringRef FileName, // We might have an object without a section header. In this case the size of // Symbols is zero, because there is no way to know the size of the dynamic // table. We should allow this case and not print a warning. - if (!Symbols.empty() && SymIndex >= Symbols.size()) + if (!Symbols.empty() && Reloc.Symbol >= Symbols.size()) return WarnAndReturn( nullptr, "index is greater than or equal to the number of dynamic symbols (" + Twine(Symbols.size()) + ")"); - const typename ELFT::Sym *Sym = FirstSym + SymIndex; + const typename ELFT::Sym *Sym = FirstSym + Reloc.Symbol; Expected ErrOrName = Sym->getName(Dumper->getDynamicStringTable()); if (!ErrOrName) return WarnAndReturn(Sym, toString(ErrOrName.takeError())); @@ -4395,8 +4376,7 @@ RelSymbol getSymbolForReloc(const ELFFile &Obj, StringRef FileName, } // namespace template -template -void GNUStyle::printDynamicRelocation(const RelTy &R) { +void GNUStyle::printDynamicRelocation(const Relocation &R) { printRelRelaReloc( R, getSymbolForReloc(this->Obj, this->FileName, this->dumper(), R)); } @@ -4448,34 +4428,35 @@ template void GNUStyle::printDynamicRelocations() { const DynRegionInfo &DynRelaRegion = this->dumper()->getDynRelaRegion(); const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion(); const DynRegionInfo &DynPLTRelRegion = this->dumper()->getDynPLTRelRegion(); + const bool IsMips64EL = this->Obj.isMips64EL(); if (DynRelaRegion.Size > 0) { printDynamicRelocHeader(this->Obj, OS, ELF::SHT_RELA, "RELA", DynRelaRegion); for (const Elf_Rela &Rela : this->dumper()->dyn_relas()) - printDynamicRelocation(Rela); + printDynamicRelocation(Relocation(Rela, IsMips64EL)); } if (DynRelRegion.Size > 0) { printDynamicRelocHeader(this->Obj, OS, ELF::SHT_REL, "REL", DynRelRegion); for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) - printDynamicRelocation(Rel); + printDynamicRelocation(Relocation(Rel, IsMips64EL)); } if (DynRelrRegion.Size > 0) { printDynamicRelocHeader(this->Obj, OS, ELF::SHT_REL, "RELR", DynRelrRegion); Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); for (const Elf_Rel &R : this->Obj.decode_relrs(Relrs)) - printDynamicRelocation(R); + printDynamicRelocation(Relocation(R, IsMips64EL)); } if (DynPLTRelRegion.Size) { if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) { printDynamicRelocHeader(this->Obj, OS, ELF::SHT_RELA, "PLT", DynPLTRelRegion); for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Rela); + printDynamicRelocation(Relocation(Rela, IsMips64EL)); } else { printDynamicRelocHeader(this->Obj, OS, ELF::SHT_REL, "PLT", DynPLTRelRegion); for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Rel); + printDynamicRelocation(Relocation(Rel, IsMips64EL)); } } } @@ -5471,11 +5452,12 @@ void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { } unsigned RelNdx = 0; + const bool IsMips64EL = this->Obj.isMips64EL(); switch (Sec.sh_type) { case ELF::SHT_REL: if (Expected RangeOrErr = Obj.rels(&Sec)) { for (const Elf_Rel &R : *RangeOrErr) - printRelReloc(R, ++RelNdx, &Sec, SymTab); + printReloc(Relocation(R, IsMips64EL), ++RelNdx, Sec, SymTab); } else { Warn(RangeOrErr.takeError()); } @@ -5483,7 +5465,7 @@ void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { case ELF::SHT_RELA: if (Expected RangeOrErr = Obj.relas(&Sec)) { for (const Elf_Rela &R : *RangeOrErr) - printRelaReloc(R, ++RelNdx, &Sec, SymTab); + printReloc(Relocation(R, IsMips64EL), ++RelNdx, Sec, SymTab); } else { Warn(RangeOrErr.takeError()); } @@ -5502,14 +5484,15 @@ void DumpStyle::printRelocationsHelper(const Elf_Shdr &Sec) { } for (const Elf_Rel &R : Obj.decode_relrs(*RangeOrErr)) - printRelReloc(R, ++RelNdx, &Sec, /*SymTab=*/nullptr); + printReloc(Relocation(R, IsMips64EL), ++RelNdx, Sec, + /*SymTab=*/nullptr); break; } case ELF::SHT_ANDROID_REL: case ELF::SHT_ANDROID_RELA: if (Expected> RelasOrErr = Obj.android_relas(&Sec)) { for (const Elf_Rela &R : *RelasOrErr) - printRelaReloc(R, ++RelNdx, &Sec, SymTab); + printReloc(Relocation(R, IsMips64EL), ++RelNdx, Sec, SymTab); } else { Warn(RelasOrErr.takeError()); } @@ -6148,31 +6131,15 @@ template void LLVMStyle::printRelocations() { } } -template -void LLVMStyle::printRelReloc(const Elf_Rel &R, unsigned RelIndex, - const Elf_Shdr *Sec, - const Elf_Shdr *SymTab) { - printRelRelaReloc(R, RelIndex, *Sec, SymTab); -} - -template -void LLVMStyle::printRelaReloc(const Elf_Rela &R, unsigned RelIndex, - const Elf_Shdr *Sec, - const Elf_Shdr *SymTab) { - printRelRelaReloc(R, RelIndex, *Sec, SymTab); -} - template void LLVMStyle::printRelrReloc(const Elf_Relr &R) { W.startLine() << W.hex(R) << "\n"; } template -template -void LLVMStyle::printRelRelaReloc(const RelTy &Rel, unsigned RelIndex, - const Elf_Shdr &Sec, - const Elf_Shdr *SymTab) { +void LLVMStyle::printReloc(const Relocation &R, unsigned RelIndex, + const Elf_Shdr &Sec, const Elf_Shdr *SymTab) { Expected> Target = - this->dumper()->getRelocationTarget(SymTab, Rel); + this->dumper()->getRelocationTarget(R, SymTab); if (!Target) { this->reportUniqueWarning(createError( "unable to print relocation " + Twine(RelIndex) + " in " + @@ -6182,20 +6149,18 @@ void LLVMStyle::printRelRelaReloc(const RelTy &Rel, unsigned RelIndex, std::string TargetName = Target->Name; SmallString<32> RelocName; - this->Obj.getRelocationTypeName(Rel.getType(this->Obj.isMips64EL()), - RelocName); + this->Obj.getRelocationTypeName(R.Type, RelocName); - uintX_t Addend = getAddend(Rel).getValueOr(0); + uintX_t Addend = R.Addend.getValueOr(0); if (opts::ExpandRelocs) { DictScope Group(W, "Relocation"); - W.printHex("Offset", Rel.r_offset); - W.printNumber("Type", RelocName, (int)Rel.getType(this->Obj.isMips64EL())); - W.printNumber("Symbol", !TargetName.empty() ? TargetName : "-", - Rel.getSymbol(this->Obj.isMips64EL())); + W.printHex("Offset", R.Offset); + W.printNumber("Type", RelocName, R.Type); + W.printNumber("Symbol", !TargetName.empty() ? TargetName : "-", R.Symbol); W.printHex("Addend", Addend); } else { raw_ostream &OS = W.startLine(); - OS << W.hex(Rel.r_offset) << " " << RelocName << " " + OS << W.hex(R.Offset) << " " << RelocName << " " << (!TargetName.empty() ? TargetName : "-") << " " << W.hex(Addend) << "\n"; } @@ -6383,53 +6348,52 @@ template void LLVMStyle::printDynamicRelocations() { const DynRegionInfo &DynRelaRegion = this->dumper()->getDynRelaRegion(); const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion(); const DynRegionInfo &DynPLTRelRegion = this->dumper()->getDynPLTRelRegion(); + const bool IsMips64EL = this->Obj.isMips64EL(); W.startLine() << "Dynamic Relocations {\n"; W.indent(); if (DynRelaRegion.Size > 0) { for (const Elf_Rela &Rela : this->dumper()->dyn_relas()) - printDynamicRelocation(Rela); + printDynamicRelocation(Relocation(Rela, IsMips64EL)); } if (DynRelRegion.Size > 0) { for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) - printDynamicRelocation(Rel); + printDynamicRelocation(Relocation(Rel, IsMips64EL)); } if (DynRelrRegion.Size > 0) { Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); - for (const Elf_Rel &R : this->Obj.decode_relrs(Relrs)) - printDynamicRelocation(R); + for (const Elf_Rel &Rel : this->Obj.decode_relrs(Relrs)) + printDynamicRelocation(Relocation(Rel, IsMips64EL)); } if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Rela); + printDynamicRelocation(Relocation(Rela, IsMips64EL)); else for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Rel); + printDynamicRelocation(Relocation(Rel, IsMips64EL)); W.unindent(); W.startLine() << "}\n"; } template -template -void LLVMStyle::printDynamicRelocation(const RelTy &Rel) { +void LLVMStyle::printDynamicRelocation(const Relocation &R) { SmallString<32> RelocName; - this->Obj.getRelocationTypeName(Rel.getType(this->Obj.isMips64EL()), - RelocName); + this->Obj.getRelocationTypeName(R.Type, RelocName); std::string SymbolName = - getSymbolForReloc(this->Obj, this->FileName, this->dumper(), Rel).Name; + getSymbolForReloc(this->Obj, this->FileName, this->dumper(), R).Name; - uintX_t Addend = getAddend(Rel).getValueOr(0); + uintX_t Addend = R.Addend.getValueOr(0); if (opts::ExpandRelocs) { DictScope Group(W, "Relocation"); - W.printHex("Offset", Rel.r_offset); - W.printNumber("Type", RelocName, (int)Rel.getType(this->Obj.isMips64EL())); + W.printHex("Offset", R.Offset); + W.printNumber("Type", RelocName, R.Type); W.printString("Symbol", !SymbolName.empty() ? SymbolName : "-"); W.printHex("Addend", Addend); } else { raw_ostream &OS = W.startLine(); - OS << W.hex(Rel.r_offset) << " " << RelocName << " " + OS << W.hex(R.Offset) << " " << RelocName << " " << (!SymbolName.empty() ? SymbolName : "-") << " " << W.hex(Addend) << "\n"; } From e57cbcbdc18adcadc6c97ff4f3f81b0f4b81c698 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 13:10:55 +0100 Subject: [PATCH 339/465] LegalizeTypes.h - remove orphan SplitVSETCC declaration. NFCI. The implementation no longer exists --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e8cc916593fbc..34c563672753d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -986,8 +986,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVSETCC(const SDNode *N); - //===--------------------------------------------------------------------===// // Generic Expansion: LegalizeTypesGeneric.cpp //===--------------------------------------------------------------------===// From 15acdd75439b402e993ebe0dbf8eb02e9b88bbdc Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Mon, 7 Sep 2020 12:09:43 +0000 Subject: [PATCH 340/465] [MLIR][Shape] Merge `shape` to `std`/`scf` lowerings. Merge the two lowering passes because they are not useful by themselves. The new pass lowers to `std` and `scf` is considered an auxiliary dialect. See also https://llvm.discourse.group/t/conversions-with-multiple-target-dialects/1541/12 Differential Revision: https://reviews.llvm.org/D86779 --- mlir/include/mlir/Conversion/Passes.h | 1 - mlir/include/mlir/Conversion/Passes.td | 12 +- .../mlir/Conversion/ShapeToSCF/ShapeToSCF.h | 27 -- mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt | 19 - mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp | 337 --------------- .../ShapeToStandard/ShapeToStandard.cpp | 391 +++++++++++++++--- .../Conversion/ShapeToSCF/shape-to-scf.mlir | 132 ------ .../ShapeToStandard/shape-to-standard.mlir | 211 ++++++++-- 8 files changed, 497 insertions(+), 633 deletions(-) delete mode 100644 mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h delete mode 100644 mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt delete mode 100644 mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp delete mode 100644 mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h index 5dd10932981ba..b04498598b290 100644 --- a/mlir/include/mlir/Conversion/Passes.h +++ b/mlir/include/mlir/Conversion/Passes.h @@ -23,7 +23,6 @@ #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h" #include "mlir/Conversion/SCFToStandard/SCFToStandard.h" #include "mlir/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVMPass.h" -#include "mlir/Conversion/ShapeToSCF/ShapeToSCF.h" #include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h" diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 1b27a7308c7a0..d4b478dbf4ed0 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -239,17 +239,7 @@ def ConvertShapeToStandard : Pass<"convert-shape-to-std", "ModuleOp"> { let summary = "Convert operations from the shape dialect into the standard " "dialect"; let constructor = "mlir::createConvertShapeToStandardPass()"; - let dependentDialects = ["StandardOpsDialect"]; -} - -//===----------------------------------------------------------------------===// -// ShapeToSCF -//===----------------------------------------------------------------------===// - -def ConvertShapeToSCF : FunctionPass<"convert-shape-to-scf"> { - let summary = "Convert operations from the shape dialect to the SCF dialect"; - let constructor = "mlir::createConvertShapeToSCFPass()"; - let dependentDialects = ["scf::SCFDialect"]; + let dependentDialects = ["StandardOpsDialect", "scf::SCFDialect"]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h b/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h deleted file mode 100644 index f953f6e2ddf10..0000000000000 --- a/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h +++ /dev/null @@ -1,27 +0,0 @@ -//===- ShapeToSCF.h - Conversion utils from Shape to SCF dialect ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ -#define MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ - -#include - -namespace mlir { - -class MLIRContext; -class FunctionPass; -class OwningRewritePatternList; - -void populateShapeToSCFConversionPatterns(OwningRewritePatternList &patterns, - MLIRContext *ctx); - -std::unique_ptr createConvertShapeToSCFPass(); - -} // namespace mlir - -#endif // MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ diff --git a/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt b/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt deleted file mode 100644 index 60dd2b8514da4..0000000000000 --- a/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -add_mlir_conversion_library(MLIRShapeToSCF - ShapeToSCF.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/ShapeToSCF - - DEPENDS - MLIRConversionPassIncGen - - LINK_COMPONENTS - Core - - LINK_LIBS PUBLIC - MLIRIR - MLIRShape - MLIRPass - MLIRSCF - MLIRTransforms - ) diff --git a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp deleted file mode 100644 index ae326c5c513e6..0000000000000 --- a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp +++ /dev/null @@ -1,337 +0,0 @@ -//===- ShapeToSCF.cpp - conversion from Shape to SCF dialect --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Conversion/ShapeToSCF/ShapeToSCF.h" - -#include "../PassDetail.h" -#include "mlir/Dialect/SCF/SCF.h" -#include "mlir/Dialect/Shape/IR/Shape.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/IR/BlockAndValueMapping.h" -#include "mlir/Transforms/DialectConversion.h" - -using namespace mlir; -using namespace mlir::shape; -using namespace mlir::scf; - -namespace { -struct BroadcastOpConverter : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(BroadcastOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; -}; -} // namespace - -LogicalResult BroadcastOpConverter::matchAndRewrite( - BroadcastOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands, not - // on shapes. - if (op.getType().isa()) - return failure(); - - assert(!op.lhs().getType().isa() && - !op.rhs().getType().isa()); - auto loc = op.getLoc(); - BroadcastOp::Adaptor transformed(operands); - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - - // Find smaller and greater rank and extent tensor. - Value lhsRank = rewriter.create(loc, transformed.lhs(), zero); - Value rhsRank = rewriter.create(loc, transformed.rhs(), zero); - Value lhsSmaller = - rewriter.create(loc, CmpIPredicate::ule, lhsRank, rhsRank); - Type indexTy = rewriter.getIndexType(); - Type extentTensorTy = op.getType(); - auto ifOp = rewriter.create( - loc, TypeRange{indexTy, extentTensorTy, indexTy, extentTensorTy}, - lhsSmaller, - [&](OpBuilder &b, Location loc) { - b.create(loc, ValueRange{lhsRank, transformed.lhs(), - rhsRank, transformed.rhs()}); - }, - [&](OpBuilder &b, Location loc) { - b.create(loc, ValueRange{rhsRank, transformed.rhs(), - lhsRank, transformed.lhs()}); - }); - Value smallerRank = ifOp.getResult(0); - Value smallerOperand = ifOp.getResult(1); - Value greaterRank = ifOp.getResult(2); - Value greaterOperand = ifOp.getResult(3); - - // Allocate stack memory for the broadcasted extent tensor. - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); - Value mem = rewriter.create(loc, memTy, ValueRange{greaterRank}); - - // Copy extents from greater operand that are not challenged. - Value rankDiff = - rewriter.create(loc, indexTy, greaterRank, smallerRank); - rewriter.create(loc, zero, rankDiff, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange) { - Value extent = b.create( - loc, greaterOperand, ValueRange{iv}); - b.create(loc, extent, mem, ValueRange{iv}); - b.create(loc); - }); - - // Determine remaining broadcasted extents. - rewriter.create( - loc, rankDiff, greaterRank, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange) { - Value greaterOperandExtent = - b.create(loc, greaterOperand, ValueRange{iv}); - Value greaterOperandExtentIsOne = - b.create(loc, CmpIPredicate::eq, greaterOperandExtent, one); - auto ifOp = b.create( - loc, TypeRange{indexTy}, greaterOperandExtentIsOne, - [&](OpBuilder &b, Location loc) { - Value ivShifted = b.create(loc, indexTy, iv, rankDiff); - Value smallerOperandExtent = b.create( - loc, smallerOperand, ValueRange{ivShifted}); - b.create(loc, smallerOperandExtent); - }, - [&](OpBuilder &b, Location loc) { - b.create(loc, greaterOperandExtent); - }); - Value extent = ifOp.getResult(0); - b.create(loc, extent, mem, ValueRange{iv}); - b.create(loc); - }); - - // Load broadcasted shape as an extent tensor. - rewriter.replaceOpWithNewOp(op, mem); - return success(); -} - -namespace { -/// Converts `shape.shape_eq` to an `scf.for` loop. For now, the lowering is -/// only defined on `tensor` operands. The test for equality first -/// compares their size and, if equal, checks every extent for equality. -/// -/// Example: -/// -/// %result = shape.shape_eq %a, %b : tensor, tensor -/// -/// becomes -/// -/// %c0 = constant 0 : index -/// %0 = dim %arg0, %c0 : tensor -/// %1 = dim %arg1, %c0 : tensor -/// %2 = cmpi "eq", %0, %1 : index -/// %result = scf.if %2 -> (i1) { -/// %c1 = constant 1 : index -/// %true = constant true -/// %4 = scf.for %arg2 = %c0 to %0 step %c1 iter_args(%arg3 = %true) -> (i1) { -/// %5 = extract_element %arg0[%arg2] : tensor -/// %6 = extract_element %arg1[%arg2] : tensor -/// %7 = cmpi "eq", %5, %6 : index -/// %8 = and %arg3, %7 : i1 -/// scf.yield %8 : i1 -/// } -/// scf.yield %4 : i1 -/// } else { -/// %false = constant false -/// scf.yield %false : i1 -/// } -/// -struct ShapeEqOpConverter : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ShapeEqOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; -}; -} // namespace - -LogicalResult -ShapeEqOpConverter::matchAndRewrite(ShapeEqOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands, not - // on shapes. - if (op.lhs().getType().isa() || - op.rhs().getType().isa()) { - return failure(); - } - - ShapeEqOp::Adaptor transformed(operands); - auto loc = op.getLoc(); - Type indexTy = rewriter.getIndexType(); - Value zero = rewriter.create(loc, 0); - Value lhsRank = rewriter.create(loc, indexTy, transformed.lhs(), zero); - Value rhsRank = rewriter.create(loc, indexTy, transformed.rhs(), zero); - Value eqRank = - rewriter.create(loc, CmpIPredicate::eq, lhsRank, rhsRank); - Type i1Ty = rewriter.getI1Type(); - rewriter.replaceOpWithNewOp( - op, i1Ty, eqRank, - [&](OpBuilder &b, Location loc) { - Value one = b.create(loc, 1); - Value init = b.create(loc, i1Ty, b.getBoolAttr(true)); - auto loop = b.create( - loc, zero, lhsRank, one, ValueRange{init}, - [&](OpBuilder &b, Location nestedLoc, Value iv, ValueRange args) { - Value conj = args[0]; - Value lhsExtent = - b.create(loc, transformed.lhs(), iv); - Value rhsExtent = - b.create(loc, transformed.rhs(), iv); - Value eqExtent = b.create(loc, CmpIPredicate::eq, - lhsExtent, rhsExtent); - Value conjNext = b.create(loc, conj, eqExtent); - b.create(loc, ValueRange({conjNext})); - }); - b.create(loc, loop.getResults()); - }, - [&](OpBuilder &b, Location loc) { - Value result = b.create(loc, i1Ty, b.getBoolAttr(false)); - b.create(loc, result); - }); - return success(); -} - -namespace { -/// Converts `shape.reduce` to `scf.for`. -struct ReduceOpConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(shape::ReduceOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final; -}; -} // namespace - -LogicalResult -ReduceOpConverter::matchAndRewrite(shape::ReduceOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands. - if (op.shape().getType().isa()) - return failure(); - - auto loc = op.getLoc(); - shape::ReduceOp::Adaptor transformed(operands); - - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - Type indexTy = rewriter.getIndexType(); - Value rank = rewriter.create(loc, indexTy, transformed.shape(), zero); - - auto loop = rewriter.create( - loc, zero, rank, one, op.initVals(), - [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { - Value extent = b.create(loc, transformed.shape(), iv); - - SmallVector mappedValues{iv, extent}; - mappedValues.append(args.begin(), args.end()); - - BlockAndValueMapping mapping; - Block *reduceBody = op.getBody(); - mapping.map(reduceBody->getArguments(), mappedValues); - for (auto &nested : reduceBody->without_terminator()) - b.clone(nested, mapping); - - SmallVector mappedResults; - for (auto result : reduceBody->getTerminator()->getOperands()) - mappedResults.push_back(mapping.lookup(result)); - b.create(loc, mappedResults); - }); - - rewriter.replaceOp(op, loop.getResults()); - return success(); -} - -namespace { -/// Converts `shape_of` to for loop for unranked tensors. -class ShapeOfOpConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ShapeOfOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; -}; -} // namespace - -LogicalResult -ShapeOfOpConverter::matchAndRewrite(ShapeOfOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering supports only error-free arguments. - if (op.getType().isa()) - return failure(); - - // For ranked tensors `shape_of` lowers to `std` and the pattern can be - // found in the corresponding pass. - ShapeOfOp::Adaptor transformed(operands); - Value arg = transformed.arg(); - Type argTy = arg.getType(); - if (argTy.isa()) - return failure(); - - // Allocate stack memory. - auto loc = op.getLoc(); - Value rank = rewriter.create(loc, arg); - Type indexTy = rewriter.getIndexType(); - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); - Value mem = rewriter.create(loc, memTy, ValueRange{rank}); - - // Copy shape extents to stack-allocated memory. - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - rewriter.create( - loc, zero, rank, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { - Value dim = rewriter.create(loc, arg, iv); - rewriter.create(loc, dim, mem, ValueRange{iv}); - rewriter.create(loc); - }); - - // Load extents to tensor value. - rewriter.replaceOpWithNewOp(op.getOperation(), mem); - return success(); -} - -namespace { -struct ConvertShapeToSCFPass - : public ConvertShapeToSCFBase { - void runOnFunction() override; -}; -} // namespace - -void ConvertShapeToSCFPass::runOnFunction() { - MLIRContext &ctx = getContext(); - - // Populate conversion patterns. - OwningRewritePatternList patterns; - populateShapeToSCFConversionPatterns(patterns, &ctx); - - // Setup target legality. - ConversionTarget target(getContext()); - target.addLegalDialect(); - - // Apply conversion. - if (failed(applyPartialConversion(getFunction(), target, patterns))) - signalPassFailure(); -} - -void mlir::populateShapeToSCFConversionPatterns( - OwningRewritePatternList &patterns, MLIRContext *ctx) { - // clang-format off - patterns.insert< - BroadcastOpConverter, - ShapeEqOpConverter, - ReduceOpConverter, - ShapeOfOpConverter>(ctx); - // clang-format on -} - -std::unique_ptr mlir::createConvertShapeToSCFPass() { - return std::make_unique(); -} diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp index e92bb83d4f424..8c917e08f942c 100644 --- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp +++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp @@ -12,10 +12,12 @@ #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/Shape/IR/Shape.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/BlockAndValueMapping.h" #include "mlir/Transforms/DialectConversion.h" using namespace mlir; using namespace mlir::shape; +using namespace mlir::scf; /// Conversion patterns. namespace { @@ -63,67 +65,94 @@ class BinaryOpConversion : public OpConversionPattern { } // namespace namespace { -class ConstSizeOpConversion : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ConstSizeOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp(op, op.value().getSExtValue()); - return success(); - } -}; -} // namespace - -namespace { -class ShapeOfOpConversion : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; +struct BroadcastOpConverter : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(ShapeOfOp op, ArrayRef operands, + matchAndRewrite(BroadcastOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override; }; } // namespace -LogicalResult ShapeOfOpConversion::matchAndRewrite( - ShapeOfOp op, ArrayRef operands, +LogicalResult BroadcastOpConverter::matchAndRewrite( + BroadcastOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const { - - // For now, only error-free types are supported by this lowering. + // For now, this lowering is only defined on `tensor` operands, not + // on shapes. if (op.getType().isa()) return failure(); - // For unranked tensors `shape_of` lowers to `scf` and the pattern can be - // found in the corresponding pass. - ShapeOfOp::Adaptor transformed(operands); - Value tensorVal = transformed.arg(); - Type tensorTy = tensorVal.getType(); - if (tensorTy.isa()) - return failure(); - - // Build values for individual dimensions. - SmallVector dimValues; - RankedTensorType rankedTensorTy = tensorTy.cast(); - int64_t rank = rankedTensorTy.getRank(); + assert(!op.lhs().getType().isa() && + !op.rhs().getType().isa()); auto loc = op.getLoc(); - for (int64_t i = 0; i < rank; i++) { - if (rankedTensorTy.isDynamicDim(i)) { - Value dimVal = rewriter.create(loc, tensorVal, i); - dimValues.push_back(dimVal); - } else { - int64_t dim = rankedTensorTy.getDimSize(i); - Value dimVal = rewriter.create(loc, dim); - dimValues.push_back(dimVal); - } - } - - // Materialize extent tensor. - Value staticExtentTensor = - rewriter.create(loc, dimValues); - rewriter.replaceOpWithNewOp(op, staticExtentTensor, - op.getType()); + BroadcastOp::Adaptor transformed(operands); + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + + // Find smaller and greater rank and extent tensor. + Value lhsRank = rewriter.create(loc, transformed.lhs(), zero); + Value rhsRank = rewriter.create(loc, transformed.rhs(), zero); + Value lhsSmaller = + rewriter.create(loc, CmpIPredicate::ule, lhsRank, rhsRank); + Type indexTy = rewriter.getIndexType(); + Type extentTensorTy = op.getType(); + auto ifOp = rewriter.create( + loc, TypeRange{indexTy, extentTensorTy, indexTy, extentTensorTy}, + lhsSmaller, + [&](OpBuilder &b, Location loc) { + b.create(loc, ValueRange{lhsRank, transformed.lhs(), + rhsRank, transformed.rhs()}); + }, + [&](OpBuilder &b, Location loc) { + b.create(loc, ValueRange{rhsRank, transformed.rhs(), + lhsRank, transformed.lhs()}); + }); + Value smallerRank = ifOp.getResult(0); + Value smallerOperand = ifOp.getResult(1); + Value greaterRank = ifOp.getResult(2); + Value greaterOperand = ifOp.getResult(3); + + // Allocate stack memory for the broadcasted extent tensor. + Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); + Value mem = rewriter.create(loc, memTy, ValueRange{greaterRank}); + + // Copy extents from greater operand that are not challenged. + Value rankDiff = + rewriter.create(loc, indexTy, greaterRank, smallerRank); + rewriter.create(loc, zero, rankDiff, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange) { + Value extent = b.create( + loc, greaterOperand, ValueRange{iv}); + b.create(loc, extent, mem, ValueRange{iv}); + b.create(loc); + }); + + // Determine remaining broadcasted extents. + rewriter.create( + loc, rankDiff, greaterRank, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange) { + Value greaterOperandExtent = + b.create(loc, greaterOperand, ValueRange{iv}); + Value greaterOperandExtentIsOne = + b.create(loc, CmpIPredicate::eq, greaterOperandExtent, one); + auto ifOp = b.create( + loc, TypeRange{indexTy}, greaterOperandExtentIsOne, + [&](OpBuilder &b, Location loc) { + Value ivShifted = b.create(loc, indexTy, iv, rankDiff); + Value smallerOperandExtent = b.create( + loc, smallerOperand, ValueRange{ivShifted}); + b.create(loc, smallerOperandExtent); + }, + [&](OpBuilder &b, Location loc) { + b.create(loc, greaterOperandExtent); + }); + Value extent = ifOp.getResult(0); + b.create(loc, extent, mem, ValueRange{iv}); + b.create(loc); + }); + + // Load broadcasted shape as an extent tensor. + rewriter.replaceOpWithNewOp(op, mem); return success(); } @@ -161,26 +190,23 @@ LogicalResult ConstShapeOpConverter::matchAndRewrite( } namespace { -class ToExtentTensorOpConversion - : public OpConversionPattern { +class ConstSizeOpConversion : public OpConversionPattern { public: - using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(ToExtentTensorOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - ToExtentTensorOpAdaptor adaptor(operands); - - if (!adaptor.input().getType().isa()) - return rewriter.notifyMatchFailure(op, "input needs to be a tensor"); - - rewriter.replaceOpWithNewOp(op, adaptor.input(), - op.getType()); - return success(); - } + matchAndRewrite(ConstSizeOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; }; } // namespace +LogicalResult ConstSizeOpConversion::matchAndRewrite( + ConstSizeOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + rewriter.replaceOpWithNewOp(op, op.value().getSExtValue()); + return success(); +} + namespace { class GetExtentOpConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -239,6 +265,236 @@ RankOpConverter::matchAndRewrite(shape::RankOp op, ArrayRef operands, return success(); } +namespace { +/// Converts `shape.reduce` to `scf.for`. +struct ReduceOpConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(shape::ReduceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final; +}; +} // namespace + +LogicalResult +ReduceOpConverter::matchAndRewrite(shape::ReduceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + // For now, this lowering is only defined on `tensor` operands. + if (op.shape().getType().isa()) + return failure(); + + auto loc = op.getLoc(); + shape::ReduceOp::Adaptor transformed(operands); + + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + Type indexTy = rewriter.getIndexType(); + Value rank = rewriter.create(loc, indexTy, transformed.shape(), zero); + + auto loop = rewriter.create( + loc, zero, rank, one, op.initVals(), + [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { + Value extent = b.create(loc, transformed.shape(), iv); + + SmallVector mappedValues{iv, extent}; + mappedValues.append(args.begin(), args.end()); + + BlockAndValueMapping mapping; + Block *reduceBody = op.getBody(); + mapping.map(reduceBody->getArguments(), mappedValues); + for (auto &nested : reduceBody->without_terminator()) + b.clone(nested, mapping); + + SmallVector mappedResults; + for (auto result : reduceBody->getTerminator()->getOperands()) + mappedResults.push_back(mapping.lookup(result)); + b.create(loc, mappedResults); + }); + + rewriter.replaceOp(op, loop.getResults()); + return success(); +} + +namespace { +/// Converts `shape.shape_eq` to an `scf.for` loop. For now, the lowering is +/// only defined on `tensor` operands. The test for equality first +/// compares their size and, if equal, checks every extent for equality. +/// +/// Example: +/// +/// %result = shape.shape_eq %a, %b : tensor, tensor +/// +/// becomes +/// +/// %c0 = constant 0 : index +/// %0 = dim %arg0, %c0 : tensor +/// %1 = dim %arg1, %c0 : tensor +/// %2 = cmpi "eq", %0, %1 : index +/// %result = scf.if %2 -> (i1) { +/// %c1 = constant 1 : index +/// %true = constant true +/// %4 = scf.for %arg2 = %c0 to %0 step %c1 iter_args(%arg3 = %true) -> (i1) { +/// %5 = extract_element %arg0[%arg2] : tensor +/// %6 = extract_element %arg1[%arg2] : tensor +/// %7 = cmpi "eq", %5, %6 : index +/// %8 = and %arg3, %7 : i1 +/// scf.yield %8 : i1 +/// } +/// scf.yield %4 : i1 +/// } else { +/// %false = constant false +/// scf.yield %false : i1 +/// } +/// +struct ShapeEqOpConverter : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ShapeEqOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; +} // namespace + +LogicalResult +ShapeEqOpConverter::matchAndRewrite(ShapeEqOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + // For now, this lowering is only defined on `tensor` operands, not + // on shapes. + if (op.lhs().getType().isa() || + op.rhs().getType().isa()) { + return failure(); + } + + ShapeEqOp::Adaptor transformed(operands); + auto loc = op.getLoc(); + Type indexTy = rewriter.getIndexType(); + Value zero = rewriter.create(loc, 0); + Value lhsRank = rewriter.create(loc, indexTy, transformed.lhs(), zero); + Value rhsRank = rewriter.create(loc, indexTy, transformed.rhs(), zero); + Value eqRank = + rewriter.create(loc, CmpIPredicate::eq, lhsRank, rhsRank); + Type i1Ty = rewriter.getI1Type(); + rewriter.replaceOpWithNewOp( + op, i1Ty, eqRank, + [&](OpBuilder &b, Location loc) { + Value one = b.create(loc, 1); + Value init = b.create(loc, i1Ty, b.getBoolAttr(true)); + auto loop = b.create( + loc, zero, lhsRank, one, ValueRange{init}, + [&](OpBuilder &b, Location nestedLoc, Value iv, ValueRange args) { + Value conj = args[0]; + Value lhsExtent = + b.create(loc, transformed.lhs(), iv); + Value rhsExtent = + b.create(loc, transformed.rhs(), iv); + Value eqExtent = b.create(loc, CmpIPredicate::eq, + lhsExtent, rhsExtent); + Value conjNext = b.create(loc, conj, eqExtent); + b.create(loc, ValueRange({conjNext})); + }); + b.create(loc, loop.getResults()); + }, + [&](OpBuilder &b, Location loc) { + Value result = b.create(loc, i1Ty, b.getBoolAttr(false)); + b.create(loc, result); + }); + return success(); +} + +namespace { +class ShapeOfOpConversion : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ShapeOfOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; +} // namespace + +LogicalResult ShapeOfOpConversion::matchAndRewrite( + ShapeOfOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + + // For now, only error-free types are supported by this lowering. + if (op.getType().isa()) + return failure(); + + // For ranked tensor arguments, lower to `tensor_from_elements`. + ShapeOfOp::Adaptor transformed(operands); + Value tensor = transformed.arg(); + Type tensorTy = tensor.getType(); + if (tensorTy.isa()) { + + // Build values for individual extents. + SmallVector extentValues; + RankedTensorType rankedTensorTy = tensorTy.cast(); + int64_t rank = rankedTensorTy.getRank(); + auto loc = op.getLoc(); + for (int64_t i = 0; i < rank; i++) { + if (rankedTensorTy.isDynamicDim(i)) { + Value extent = rewriter.create(loc, tensor, i); + extentValues.push_back(extent); + } else { + Value extent = + rewriter.create(loc, rankedTensorTy.getDimSize(i)); + extentValues.push_back(extent); + } + } + + // Materialize extent tensor. + Value staticExtentTensor = + rewriter.create(loc, extentValues); + rewriter.replaceOpWithNewOp(op, staticExtentTensor, + op.getType()); + return success(); + } + + // Allocate stack memory. + auto loc = op.getLoc(); + Value rank = rewriter.create(loc, tensor); + Type indexTy = rewriter.getIndexType(); + Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); + Value mem = rewriter.create(loc, memTy, ValueRange{rank}); + + // Copy shape extents to stack-allocated memory. + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + rewriter.create( + loc, zero, rank, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { + Value dim = rewriter.create(loc, tensor, iv); + rewriter.create(loc, dim, mem, ValueRange{iv}); + rewriter.create(loc); + }); + + // Load extents to tensor value. + rewriter.replaceOpWithNewOp(op.getOperation(), mem); + return success(); +} + +namespace { +class ToExtentTensorOpConversion + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ToExtentTensorOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + ToExtentTensorOpAdaptor adaptor(operands); + + if (!adaptor.input().getType().isa()) + return rewriter.notifyMatchFailure(op, "input needs to be a tensor"); + + rewriter.replaceOpWithNewOp(op, adaptor.input(), + op.getType()); + return success(); + } +}; +} // namespace + namespace { /// Conversion pass. class ConvertShapeToStandardPass @@ -252,7 +508,7 @@ void ConvertShapeToStandardPass::runOnOperation() { // Setup target legality. MLIRContext &ctx = getContext(); ConversionTarget target(ctx); - target.addLegalDialect(); + target.addLegalDialect(); target.addLegalOp(); // Setup conversion patterns. @@ -271,11 +527,14 @@ void mlir::populateShapeToStandardConversionPatterns( patterns.insert< AnyOpConversion, BinaryOpConversion, - ConstShapeOpConverter, BinaryOpConversion, + BroadcastOpConverter, + ConstShapeOpConverter, ConstSizeOpConversion, GetExtentOpConverter, RankOpConverter, + ReduceOpConverter, + ShapeEqOpConverter, ShapeOfOpConversion, ToExtentTensorOpConversion>(ctx); // clang-format on diff --git a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir deleted file mode 100644 index cc384496dff05..0000000000000 --- a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir +++ /dev/null @@ -1,132 +0,0 @@ -// RUN: mlir-opt -convert-shape-to-scf -split-input-file %s | FileCheck %s - -// CHECK-LABEL: @shape_reduce -// CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index -func @shape_reduce(%shape : tensor) -> index { - %init = constant 1 : index - %num_elements = shape.reduce(%shape, %init) : tensor -> index { - ^bb0(%index : index, %extent : index, %acc: index): - %new_acc = muli %acc, %extent : index - shape.yield %new_acc : index - } - return %num_elements : index -} -// CHECK-NEXT: %[[INIT:.*]] = constant 1 : index -// CHECK-NEXT: %[[C0:.*]] = constant 0 : index -// CHECK-NEXT: %[[C1:.*]] = constant 1 : index -// CHECK-NEXT: %[[RANK:.*]] = dim %[[SHAPE]], %[[C0]] : tensor -// CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) -> (index) -// CHECK-NEXT: %[[EXTENT:.*]] = extract_element %[[SHAPE]][%[[I]]] -// CHECK-NEXT: %[[NEW_ACC:.*]] = muli %[[ACC]], %[[EXTENT]] : index -// CHECK-NEXT: scf.yield %[[NEW_ACC]] : index -// CHECK-NEXT: } -// CHECK-NEXT: return %[[RESULT]] : index - -// ----- - -// Don't lower `shape_of` for result type of `shape.shape`. -// CHECK-LABEL: @shape_of -// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) -func @shape_of(%arg : tensor<*xf32>) { - // CHECK: shape.shape - %shape = shape.shape_of %arg : tensor<*xf32> -> !shape.shape - return -} - -// ----- - -// Lower `shape_of` for unranked tensors. -// CHECK-LABEL: @shape_of_unranked -// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) -func @shape_of_unranked(%arg : tensor<*xf32>) { - // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> - // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { - // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> - // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref - // CHECK: } - // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref - %shape = shape.shape_of %arg : tensor<*xf32> -> tensor - return -} - -// ----- - -// CHECK-LABEL: @shape_eq -// CHECK-SAME: (%[[A:.*]]: tensor, %[[B:.*]]: tensor) -> i1 -func @shape_eq(%a : tensor, %b : tensor) -> i1 { - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[RANK_A:.*]] = dim %[[A]], %[[C0]] : tensor - // CHECK: %[[RANK_B:.*]] = dim %[[B]], %[[C0]] : tensor - // CHECK: %[[RANK_EQ:.*]] = cmpi "eq", %[[RANK_A]], %[[RANK_B]] - // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) { - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: %[[INIT:.*]] = constant true - // CHECK: %[[SHAPE_EQ_INNER:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK_A]] step %[[C1]] iter_args(%[[CONJ:.*]] = %[[INIT]]) -> (i1) { - // CHECK: %[[EXTENT_A:.*]] = extract_element %[[A]][%[[I]]] : tensor - // CHECK: %[[EXTENT_B:.*]] = extract_element %[[B]][%[[I]]] : tensor - // CHECK: %[[EXTENT_EQ:.*]] = cmpi "eq", %[[EXTENT_A]], %[[EXTENT_B]] - // CHECK: %[[CONJ_NEXT:.*]] = and %[[CONJ]], %[[EXTENT_EQ]] - // CHECK: scf.yield %[[CONJ_NEXT]] : i1 - // CHECK: } - // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 - // CHECK: } else { - // CHECK: %[[SHAPE_EQ_INNER:.*]] = constant false - // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 - // CHECK: } - // CHECK: return %[[SHAPE_EQ]] : i1 - %result = shape.shape_eq %a, %b : tensor, tensor - return %result : i1 -} - -// ----- - -// Don't lower `shape.broadcast` if a `shape.shape` type is involved. -// CHECK-LABEL: @broadcast -func @broadcast(%a : tensor, %b : !shape.shape) -> !shape.shape { - // CHECK: shape.broadcast - %c = shape.broadcast %a, %b : tensor, !shape.shape -> !shape.shape - return %c : !shape.shape -} - -// ----- - -// CHECK-LABEL: @broadcast -// CHECK-SAME: (%[[LHS:.*]]: tensor, %[[RHS:.*]]: tensor) -func @broadcast(%a : tensor, %b : tensor) { - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor - // CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor - // CHECK: %[[LHS_SMALLER:.*]] = cmpi "ule", %[[LHS_RANK]], %[[RHS_RANK]] - // CHECK: %[[ARG:.*]]:4 = scf.if %[[LHS_SMALLER]] -> (index, tensor, index, tensor) { - // CHECK: scf.yield %[[LHS_RANK]], %[[LHS]], %[[RHS_RANK]], %[[RHS]] : index, tensor, index, tensor - // CHECK: } else { - // CHECK: scf.yield %[[RHS_RANK]], %[[RHS]], %[[LHS_RANK]], %[[LHS]] : index, tensor, index, tensor - // CHECK: } - // CHECK: %[[MEM:.*]] = alloca(%[[ARG]]#2) : memref - // CHECK: %[[RANK_DIFF:.*]] = subi %[[ARG]]#2, %[[ARG]]#0 : index - // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[RANK_DIFF]] step %[[C1]] { - // CHECK: %[[EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor - // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref - // CHECK: } - // CHECK: scf.for %[[IV:.*]] = %[[RANK_DIFF]] to %[[ARG]]#2 step %[[C1]] { - // CHECK: %[[GREATER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor - // CHECK: %[[GREATER_OPERAND_EXTENT_IS_ONE:.*]] = cmpi "eq", %[[GREATER_OPERAND_EXTENT]], %[[C1]] : index - // CHECK: %[[EXTENT:.*]] = scf.if %[[GREATER_OPERAND_EXTENT_IS_ONE]] -> (index) { - // CHECK: %[[IV_SHIFTED:.*]] = subi %[[IV]], %[[RANK_DIFF]] : index - // CHECK: %[[SMALLER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#1[%[[IV_SHIFTED]]] : tensor - // CHECK: scf.yield %[[SMALLER_OPERAND_EXTENT]] : index - // CHECK: } else { - // CHECK: scf.yield %[[GREATER_OPERAND_EXTENT]] : index - // CHECK: } - // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref - // CHECK: } - // CHECK: %[[BROADCASTED:.*]] = tensor_load %[[MEM]] : memref - %0 = shape.broadcast %a, %b - : tensor, tensor -> tensor - return -} - diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index b0fb5bac9071b..bf8e74e5143ed 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -26,46 +26,6 @@ func @binary_ops_on_size(%lhs : !shape.size, %rhs : !shape.size) { // ----- -// Don't lower `shape_of` with `shape.shape` type. -// CHECK-LABEL: @shape_of -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) -func @shape_of_stat(%arg : tensor<1x2x3xf32>) { - // CHECK: shape.shape_of %[[ARG]] : tensor<1x2x3xf32> -> !shape.shape - %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> !shape.shape - return -} - -// ----- - -// Lower `shape_of` for statically shaped tensor. -// CHECK-LABEL: @shape_of_stat -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) -func @shape_of_stat(%arg : tensor<1x2x3xf32>) { - // CHECK-DAG: %[[C1:.*]] = constant 1 : index - // CHECK-DAG: %[[C2:.*]] = constant 2 : index - // CHECK-DAG: %[[C3:.*]] = constant 3 : index - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) : tensor<3xindex> - %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> tensor - return -} - -// ----- - -// Lower `shape_of` for dynamically shaped tensor. -// CHECK-LABEL: @shape_of_dyn -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x5x?xf32>) -func @shape_of_dyn(%arg : tensor<1x5x?xf32>) { - // CHECK-DAG: %[[C1:.*]] = constant 1 : index - // CHECK-DAG: %[[C5:.*]] = constant 5 : index - // CHECK-DAG: %[[C2:.*]] = constant 2 : index - // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32> - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C5]], %[[DYN_DIM]]) : tensor<3xindex> - %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor - return -} - -// ----- - // Convert `rank` to `dim` of the first dimension. // CHECK-LABEL: @rank // CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index @@ -190,3 +150,174 @@ func @to_extent_tensor(%arg: tensor) -> tensor<3xindex> { // CHECK: return %[[RES]] return %casted : tensor<3xindex> } + +// CHECK-LABEL: @shape_reduce +// CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index +func @shape_reduce(%shape : tensor) -> index { + %init = constant 1 : index + %num_elements = shape.reduce(%shape, %init) : tensor -> index { + ^bb0(%index : index, %extent : index, %acc: index): + %new_acc = muli %acc, %extent : index + shape.yield %new_acc : index + } + return %num_elements : index +} +// CHECK-NEXT: %[[INIT:.*]] = constant 1 : index +// CHECK-NEXT: %[[C0:.*]] = constant 0 : index +// CHECK-NEXT: %[[C1:.*]] = constant 1 : index +// CHECK-NEXT: %[[RANK:.*]] = dim %[[SHAPE]], %[[C0]] : tensor +// CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) -> (index) +// CHECK-NEXT: %[[EXTENT:.*]] = extract_element %[[SHAPE]][%[[I]]] +// CHECK-NEXT: %[[NEW_ACC:.*]] = muli %[[ACC]], %[[EXTENT]] : index +// CHECK-NEXT: scf.yield %[[NEW_ACC]] : index +// CHECK-NEXT: } +// CHECK-NEXT: return %[[RESULT]] : index + +// ----- + +// Don't lower `shape_of` for result type of `shape.shape`. +// CHECK-LABEL: @shape_of +// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) +func @shape_of(%arg : tensor<*xf32>) { + // CHECK: shape.shape + %shape = shape.shape_of %arg : tensor<*xf32> -> !shape.shape + return +} + +// ----- + +// Lower `shape_of` for unranked tensors. +// CHECK-LABEL: @shape_of_unranked +// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) +func @shape_of_unranked(%arg : tensor<*xf32>) { + // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> + // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { + // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> + // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref + // CHECK: } + // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref + %shape = shape.shape_of %arg : tensor<*xf32> -> tensor + return +} + +// ----- + +// Don't lower `shape_of` with `shape.shape` type. +// CHECK-LABEL: @shape_of +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) +func @shape_of_stat(%arg : tensor<1x2x3xf32>) { + // CHECK: shape.shape_of %[[ARG]] : tensor<1x2x3xf32> -> !shape.shape + %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> !shape.shape + return +} + +// ----- + +// Lower `shape_of` for statically shaped tensor. +// CHECK-LABEL: @shape_of_stat +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) +func @shape_of_stat(%arg : tensor<1x2x3xf32>) { + // CHECK-DAG: %[[C1:.*]] = constant 1 : index + // CHECK-DAG: %[[C2:.*]] = constant 2 : index + // CHECK-DAG: %[[C3:.*]] = constant 3 : index + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) : tensor<3xindex> + %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> tensor + return +} + +// ----- + +// Lower `shape_of` for dynamically shaped tensor. +// CHECK-LABEL: @shape_of_dyn +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x5x?xf32>) +func @shape_of_dyn(%arg : tensor<1x5x?xf32>) { + // CHECK-DAG: %[[C1:.*]] = constant 1 : index + // CHECK-DAG: %[[C5:.*]] = constant 5 : index + // CHECK-DAG: %[[C2:.*]] = constant 2 : index + // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32> + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C5]], %[[DYN_DIM]]) : tensor<3xindex> + %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor + return +} + +// ----- + +// CHECK-LABEL: @shape_eq +// CHECK-SAME: (%[[A:.*]]: tensor, %[[B:.*]]: tensor) -> i1 +func @shape_eq(%a : tensor, %b : tensor) -> i1 { + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[RANK_A:.*]] = dim %[[A]], %[[C0]] : tensor + // CHECK: %[[RANK_B:.*]] = dim %[[B]], %[[C0]] : tensor + // CHECK: %[[RANK_EQ:.*]] = cmpi "eq", %[[RANK_A]], %[[RANK_B]] + // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) { + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: %[[INIT:.*]] = constant true + // CHECK: %[[SHAPE_EQ_INNER:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK_A]] step %[[C1]] iter_args(%[[CONJ:.*]] = %[[INIT]]) -> (i1) { + // CHECK: %[[EXTENT_A:.*]] = extract_element %[[A]][%[[I]]] : tensor + // CHECK: %[[EXTENT_B:.*]] = extract_element %[[B]][%[[I]]] : tensor + // CHECK: %[[EXTENT_EQ:.*]] = cmpi "eq", %[[EXTENT_A]], %[[EXTENT_B]] + // CHECK: %[[CONJ_NEXT:.*]] = and %[[CONJ]], %[[EXTENT_EQ]] + // CHECK: scf.yield %[[CONJ_NEXT]] : i1 + // CHECK: } + // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 + // CHECK: } else { + // CHECK: %[[SHAPE_EQ_INNER:.*]] = constant false + // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 + // CHECK: } + // CHECK: return %[[SHAPE_EQ]] : i1 + %result = shape.shape_eq %a, %b : tensor, tensor + return %result : i1 +} + +// ----- + +// Don't lower `shape.broadcast` if a `shape.shape` type is involved. +// CHECK-LABEL: @broadcast +func @broadcast(%a : tensor, %b : !shape.shape) -> !shape.shape { + // CHECK: shape.broadcast + %c = shape.broadcast %a, %b : tensor, !shape.shape -> !shape.shape + return %c : !shape.shape +} + +// ----- + +// CHECK-LABEL: @broadcast +// CHECK-SAME: (%[[LHS:.*]]: tensor, %[[RHS:.*]]: tensor) +func @broadcast(%a : tensor, %b : tensor) { + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor + // CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor + // CHECK: %[[LHS_SMALLER:.*]] = cmpi "ule", %[[LHS_RANK]], %[[RHS_RANK]] + // CHECK: %[[ARG:.*]]:4 = scf.if %[[LHS_SMALLER]] -> (index, tensor, index, tensor) { + // CHECK: scf.yield %[[LHS_RANK]], %[[LHS]], %[[RHS_RANK]], %[[RHS]] : index, tensor, index, tensor + // CHECK: } else { + // CHECK: scf.yield %[[RHS_RANK]], %[[RHS]], %[[LHS_RANK]], %[[LHS]] : index, tensor, index, tensor + // CHECK: } + // CHECK: %[[MEM:.*]] = alloca(%[[ARG]]#2) : memref + // CHECK: %[[RANK_DIFF:.*]] = subi %[[ARG]]#2, %[[ARG]]#0 : index + // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[RANK_DIFF]] step %[[C1]] { + // CHECK: %[[EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor + // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref + // CHECK: } + // CHECK: scf.for %[[IV:.*]] = %[[RANK_DIFF]] to %[[ARG]]#2 step %[[C1]] { + // CHECK: %[[GREATER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor + // CHECK: %[[GREATER_OPERAND_EXTENT_IS_ONE:.*]] = cmpi "eq", %[[GREATER_OPERAND_EXTENT]], %[[C1]] : index + // CHECK: %[[EXTENT:.*]] = scf.if %[[GREATER_OPERAND_EXTENT_IS_ONE]] -> (index) { + // CHECK: %[[IV_SHIFTED:.*]] = subi %[[IV]], %[[RANK_DIFF]] : index + // CHECK: %[[SMALLER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#1[%[[IV_SHIFTED]]] : tensor + // CHECK: scf.yield %[[SMALLER_OPERAND_EXTENT]] : index + // CHECK: } else { + // CHECK: scf.yield %[[GREATER_OPERAND_EXTENT]] : index + // CHECK: } + // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref + // CHECK: } + // CHECK: %[[BROADCASTED:.*]] = tensor_load %[[MEM]] : memref + %0 = shape.broadcast %a, %b + : tensor, tensor -> tensor + return +} + From 1c849ec40a53ca017a668b957fef333e560b0886 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 7 Sep 2020 06:05:57 -0400 Subject: [PATCH 341/465] [MLIR] Fix Win test due to partial order of CHECK directives Differential Revision: https://reviews.llvm.org/D87230 --- mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir index 1a8d1a68a126c..240925baf3d8c 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -99,9 +99,9 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: %[[L3:.*]] = select // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] // - // CHECK-NEXT: {{.*}} = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref - // CHECK-NEXT: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> - // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %25, %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK-DAG: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref + // CHECK-DAG: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> // CHECK-NEXT: } // CHECK-NEXT: } @@ -183,7 +183,7 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // // CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> - // CHECK: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref + // CHECK-NEXT: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } From 3ca8b9a560a249a18b9f6092b96aa7e8e52db5cf Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 06:53:40 -0400 Subject: [PATCH 342/465] [InstCombine] give a name to an intermediate value for easier tracking; NFC As noted in PR47430, we probably want to conditionally include 'nsw' here anyway, so we are going to need to fill out the optional args. --- .../InstCombine/InstCombineAddSub.cpp | 2 +- llvm/test/Transforms/InstCombine/sub.ll | 25 +++++++++---------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 6812bedf26d1a..5cf6eb2a885a6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1691,7 +1691,7 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, // pointer, subtract it from the offset we have. if (GEP2) { Value *Offset = EmitGEPOffset(GEP2); - Result = Builder.CreateSub(Result, Offset); + Result = Builder.CreateSub(Result, Offset, "gepdiff"); } // If we have p - gep(p, ...) then we have to negate the result. diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index d2e566be34110..dbe1631226d65 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -503,12 +503,11 @@ define i64 @test24b(i8* %P, i64 %A){ ret i64 %G } - define i64 @test25(i8* %P, i64 %A){ ; CHECK-LABEL: @test25( ; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i64 [[A:%.*]], 1 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[B_IDX_NEG_NEG]], -84 -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i64 [[B_IDX_NEG_NEG]], -84 +; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] ; %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A %C = ptrtoint i16* %B to i64 @@ -522,8 +521,8 @@ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { ; CHECK-LABEL: @test25_as1( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 ; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i16 [[TMP1]], 1 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i16 [[B_IDX_NEG_NEG]], -84 -; CHECK-NEXT: ret i16 [[DOTNEG]] +; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i16 [[B_IDX_NEG_NEG]], -84 +; CHECK-NEXT: ret i16 [[GEPDIFF_NEG]] ; %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A %C = ptrtoint i16 addrspace(1)* %B to i16 @@ -826,8 +825,8 @@ define i32 @test28commuted(i32 %x, i32 %y, i32 %z) { define i64 @test29(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test29( -; CHECK-NEXT: [[DOTNEG:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] ; %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j @@ -840,8 +839,8 @@ define i64 @test29(i8* %foo, i64 %i, i64 %j) { define i64 @test30(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test30( ; CHECK-NEXT: [[GEP1_IDX_NEG_NEG:%.*]] = shl i64 [[I:%.*]], 2 -; CHECK-NEXT: [[DOTNEG:%.*]] = sub i64 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] ; %bit = bitcast i8* %foo to i32* %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i @@ -855,8 +854,8 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) { define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ; CHECK-LABEL: @test30_as1( ; CHECK-NEXT: [[GEP1_IDX_NEG_NEG:%.*]] = shl i16 [[I:%.*]], 2 -; CHECK-NEXT: [[DOTNEG:%.*]] = sub i16 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] -; CHECK-NEXT: ret i16 [[DOTNEG]] +; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i16 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] +; CHECK-NEXT: ret i16 [[GEPDIFF_NEG]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i @@ -1237,8 +1236,8 @@ define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test58( ; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 ; CHECK-NEXT: [[GEP1_OFFS:%.*]] = add i64 [[I:%.*]], 4200 -; CHECK-NEXT: [[DOTNEG:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] +; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] ; %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %j From 28aa60aae25b7e46804deae909b29b66c1b41d95 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 07:03:48 -0400 Subject: [PATCH 343/465] [InstCombine] add test with more unreachable insts; NFC Goes with D87149 --- llvm/test/Transforms/InstCombine/assume.ll | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index f8a7bb01ff64f..b55b1c21c0b9c 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -597,6 +597,51 @@ exit: unreachable } +define i32 @unreachable_assumes_and_store(i32 %x, i32 %y, i32* %p) { +; CHECK-LABEL: @unreachable_assumes_and_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP0]], [[CMP1]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[OR]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[IF:%.*]], label [[EXIT:%.*]] +; CHECK: if: +; CHECK-NEXT: [[A:%.*]] = and i32 [[Y]], -2 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[A]], 104 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP3]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[X]], 2 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP4]]) +; CHECK-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[Y]], 42 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP5]]) +; CHECK-NEXT: store i32 [[X]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: unreachable +; +entry: + %cmp0 = icmp sgt i32 %x, 1 + %cmp1 = icmp eq i32 %y, 1 + %or = or i1 %cmp0, %cmp1 + tail call void @llvm.assume(i1 %or) + %cmp2 = icmp eq i32 %x, 1 + br i1 %cmp2, label %if, label %exit + +if: + %a = and i32 %y, -2 + %cmp3 = icmp ne i32 %a, 104 + tail call void @llvm.assume(i1 %cmp3) + br label %exit + +exit: + %cmp4 = icmp eq i32 %x, 2 + tail call void @llvm.assume(i1 %cmp4) + %cmp5 = icmp ugt i32 %y, 42 + tail call void @llvm.assume(i1 %cmp5) + store i32 %x, i32* %p + unreachable +} + declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} From 79ea83e104e368ef0f520f1bfa74c15d91baef93 Mon Sep 17 00:00:00 2001 From: Simon Wallis Date: Mon, 7 Sep 2020 13:21:27 +0100 Subject: [PATCH 344/465] [SelectionDAG] memcpy expansion of const volatile struct ignores const zero In getMemcpyLoadsAndStores(), a memcpy where the source is a zero constant is expanded to a MemOp::Set instead of a MemOp::Copy, even when the memcpy is volatile. This is incorrect. The fix is to add a check for volatile, and expand to MemOp::Copy in the volatile case. Reviewed By: chill Differential Revision: https://reviews.llvm.org/D87134 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 ++- .../CodeGen/ARM/memcpy-const-vol-struct.ll | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/ARM/memcpy-const-vol-struct.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d2b3e009c2026..2350248626c71 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6037,7 +6037,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SrcAlign = Alignment; assert(SrcAlign && "SrcAlign must be set"); ConstantDataArraySlice Slice; - bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); + // If marked as volatile, perform a copy even when marked as constant. + bool CopyFromConstant = !isVol && isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); const MemOp Op = isZeroConstant diff --git a/llvm/test/CodeGen/ARM/memcpy-const-vol-struct.ll b/llvm/test/CodeGen/ARM/memcpy-const-vol-struct.ll new file mode 100644 index 0000000000000..74f675876834a --- /dev/null +++ b/llvm/test/CodeGen/ARM/memcpy-const-vol-struct.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=armv7-arm-none-eabi -o - %s | FileCheck %s + +%struct.sMyType = type { i32 } + +@val = hidden constant %struct.sMyType zeroinitializer, align 4 +@v = internal global %struct.sMyType zeroinitializer, align 4 + +define hidden void @InitVal() local_unnamed_addr { +entry: + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (%struct.sMyType* @v to i8*), i8* align 4 bitcast (%struct.sMyType* @val to i8*), i32 4, i1 true) +; The last argument is the isvolatile argument. This is a volatile memcpy. +; Test that the memcpy expansion does not optimize away the load. +; CHECK: ldr +; CHECK: str + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg) From a8a91533dd65041ced68ed5b9348b5d023837488 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 13:19:00 +0100 Subject: [PATCH 345/465] [X86] Replace EmitX86AddSubSatExpr with EmitX86BinaryIntrinsic generic helper. NFCI. Feed the Intrinsic::ID value directly instead of via the IsSigned/IsAddition bool flags. --- clang/lib/CodeGen/CGBuiltin.cpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1192fbdc1c9d8..42fab29ab8aae 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11543,13 +11543,9 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } -// Emit addition or subtraction with signed/unsigned saturation. -static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, - ArrayRef Ops, bool IsSigned, - bool IsAddition) { - Intrinsic::ID IID = - IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat) - : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat); +// Emit binary intrinsic with the same type used in result/args. +static Value *EmitX86BinaryIntrinsic(CodeGenFunction &CGF, + ArrayRef Ops, Intrinsic::ID IID) { llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType()); return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]}); } @@ -14033,28 +14029,28 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_paddsw256: case X86::BI__builtin_ia32_paddsb128: case X86::BI__builtin_ia32_paddsw128: - return EmitX86AddSubSatExpr(*this, Ops, true, true); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::sadd_sat); case X86::BI__builtin_ia32_paddusb512: case X86::BI__builtin_ia32_paddusw512: case X86::BI__builtin_ia32_paddusb256: case X86::BI__builtin_ia32_paddusw256: case X86::BI__builtin_ia32_paddusb128: case X86::BI__builtin_ia32_paddusw128: - return EmitX86AddSubSatExpr(*this, Ops, false, true); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::uadd_sat); case X86::BI__builtin_ia32_psubsb512: case X86::BI__builtin_ia32_psubsw512: case X86::BI__builtin_ia32_psubsb256: case X86::BI__builtin_ia32_psubsw256: case X86::BI__builtin_ia32_psubsb128: case X86::BI__builtin_ia32_psubsw128: - return EmitX86AddSubSatExpr(*this, Ops, true, false); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::ssub_sat); case X86::BI__builtin_ia32_psubusb512: case X86::BI__builtin_ia32_psubusw512: case X86::BI__builtin_ia32_psubusb256: case X86::BI__builtin_ia32_psubusw256: case X86::BI__builtin_ia32_psubusb128: case X86::BI__builtin_ia32_psubusw128: - return EmitX86AddSubSatExpr(*this, Ops, false, false); + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat); } } From 6670f5d1e66563ad482576d9db4b8393539ab53b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 13:20:34 +0100 Subject: [PATCH 346/465] MachineStableHash.h - remove MachineInstr.h include. NFC. Use forward declarations and move the include to MachineStableHash.cpp --- llvm/include/llvm/CodeGen/MachineStableHash.h | 4 +++- llvm/lib/CodeGen/MachineStableHash.cpp | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/MachineStableHash.h b/llvm/include/llvm/CodeGen/MachineStableHash.h index 19bd28a794776..a5e85aef099dc 100644 --- a/llvm/include/llvm/CodeGen/MachineStableHash.h +++ b/llvm/include/llvm/CodeGen/MachineStableHash.h @@ -14,10 +14,12 @@ #ifndef LLVM_CODEGEN_MACHINESTABLEHASH_H #define LLVM_CODEGEN_MACHINESTABLEHASH_H -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/StableHashing.h" namespace llvm { +class MachineInstr; +class MachineOperand; + stable_hash stableHashValue(const MachineOperand &MO); stable_hash stableHashValue(const MachineInstr &MI, bool HashVRegs = false, bool HashConstantPoolIndices = false, diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp index 7106d4172ce0d..fb14f0a33209f 100644 --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" From dbb81881955d641bc873442e75874a5cb160f4ee Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 7 Sep 2020 15:30:38 +0300 Subject: [PATCH 347/465] [llvm-readobj/elf] - Generalize the code for printing dynamic relocations. NFCI. Currently we have 2 large `printDynamicRelocations` methods that have a very similar code for GNU/LLVM styles. This patch removes the duplication and renames them to `printDynamicReloc` for consistency. Differential revision: https://reviews.llvm.org/D87087 --- llvm/tools/llvm-readobj/ELFDumper.cpp | 91 +++++++++++---------------- 1 file changed, 37 insertions(+), 54 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index d7312eaf2c93a..9c1b2e3209373 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -774,7 +774,11 @@ template class DumpStyle { virtual void printReloc(const Relocation &R, unsigned RelIndex, const Elf_Shdr &Sec, const Elf_Shdr *SymTab) = 0; virtual void printRelrReloc(const Elf_Relr &R) = 0; + virtual void printDynamicReloc(const Relocation &R) = 0; void printRelocationsHelper(const Elf_Shdr &Sec); + void printDynamicRelocationsHelper(); + virtual void printDynamicRelocHeader(unsigned Type, StringRef Name, + const DynRegionInfo &Reg){}; StringRef getPrintableSectionName(const Elf_Shdr &Sec) const; @@ -894,9 +898,12 @@ template class GNUStyle : public DumpStyle { void printSymbol(const Elf_Sym *Symbol, const Elf_Sym *First, Optional StrTable, bool IsDynamic, bool NonVisibilityBitsUsed) override; + void printDynamicRelocHeader(unsigned Type, StringRef Name, + const DynRegionInfo &Reg) override; + void printDynamicReloc(const Relocation &R) override; + std::string getSymbolSectionNdx(const Elf_Sym *Symbol, const Elf_Sym *FirstSym); - void printDynamicRelocation(const Relocation &R); void printProgramHeaders(); void printSectionMapping(); void printGNUVersionSectionProlog(const typename ELFT::Shdr *Sec, @@ -951,8 +958,7 @@ template class LLVMStyle : public DumpStyle { void printReloc(const Relocation &R, unsigned RelIndex, const Elf_Shdr &Sec, const Elf_Shdr *SymTab) override; void printRelrReloc(const Elf_Relr &R) override; - - void printDynamicRelocation(const Relocation &R); + void printDynamicReloc(const Relocation &R) override; void printSymbols(); void printDynamicSymbols(); @@ -3694,10 +3700,9 @@ static void printRelocHeaderFields(formatted_raw_ostream &OS, unsigned SType) { } template -static void printDynamicRelocHeader(const ELFFile &Obj, - formatted_raw_ostream &OS, unsigned Type, - StringRef Name, const DynRegionInfo &Reg) { - uint64_t Offset = Reg.Addr - Obj.base(); +void GNUStyle::printDynamicRelocHeader(unsigned Type, StringRef Name, + const DynRegionInfo &Reg) { + uint64_t Offset = Reg.Addr - this->Obj.base(); OS << "\n'" << Name.str().c_str() << "' relocation section at offset 0x" << to_hexString(Offset, false) << " contains " << Reg.Size << " bytes:\n"; printRelocHeaderFields(OS, Type); @@ -4376,7 +4381,7 @@ RelSymbol getSymbolForReloc(const ELFFile &Obj, StringRef FileName, } // namespace template -void GNUStyle::printDynamicRelocation(const Relocation &R) { +void GNUStyle::printDynamicReloc(const Relocation &R) { printRelRelaReloc( R, getSymbolForReloc(this->Obj, this->FileName, this->dumper(), R)); } @@ -4424,39 +4429,43 @@ template void GNUStyle::printDynamic() { } template void GNUStyle::printDynamicRelocations() { - const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion(); - const DynRegionInfo &DynRelaRegion = this->dumper()->getDynRelaRegion(); - const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion(); - const DynRegionInfo &DynPLTRelRegion = this->dumper()->getDynPLTRelRegion(); + this->printDynamicRelocationsHelper(); +} + +template void DumpStyle::printDynamicRelocationsHelper() { const bool IsMips64EL = this->Obj.isMips64EL(); + const DynRegionInfo &DynRelaRegion = this->dumper()->getDynRelaRegion(); if (DynRelaRegion.Size > 0) { - printDynamicRelocHeader(this->Obj, OS, ELF::SHT_RELA, "RELA", - DynRelaRegion); + printDynamicRelocHeader(ELF::SHT_RELA, "RELA", DynRelaRegion); for (const Elf_Rela &Rela : this->dumper()->dyn_relas()) - printDynamicRelocation(Relocation(Rela, IsMips64EL)); + printDynamicReloc(Relocation(Rela, IsMips64EL)); } + + const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion(); if (DynRelRegion.Size > 0) { - printDynamicRelocHeader(this->Obj, OS, ELF::SHT_REL, "REL", DynRelRegion); + printDynamicRelocHeader(ELF::SHT_REL, "REL", DynRelRegion); for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) - printDynamicRelocation(Relocation(Rel, IsMips64EL)); + printDynamicReloc(Relocation(Rel, IsMips64EL)); } + + const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion(); if (DynRelrRegion.Size > 0) { - printDynamicRelocHeader(this->Obj, OS, ELF::SHT_REL, "RELR", DynRelrRegion); + printDynamicRelocHeader(ELF::SHT_REL, "RELR", DynRelrRegion); Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); - for (const Elf_Rel &R : this->Obj.decode_relrs(Relrs)) - printDynamicRelocation(Relocation(R, IsMips64EL)); + for (const Elf_Rel &Rel : Obj.decode_relrs(Relrs)) + printDynamicReloc(Relocation(Rel, IsMips64EL)); } + + const DynRegionInfo &DynPLTRelRegion = this->dumper()->getDynPLTRelRegion(); if (DynPLTRelRegion.Size) { if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) { - printDynamicRelocHeader(this->Obj, OS, ELF::SHT_RELA, "PLT", - DynPLTRelRegion); + printDynamicRelocHeader(ELF::SHT_RELA, "PLT", DynPLTRelRegion); for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Relocation(Rela, IsMips64EL)); + printDynamicReloc(Relocation(Rela, IsMips64EL)); } else { - printDynamicRelocHeader(this->Obj, OS, ELF::SHT_REL, "PLT", - DynPLTRelRegion); + printDynamicRelocHeader(ELF::SHT_REL, "PLT", DynPLTRelRegion); for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Relocation(Rel, IsMips64EL)); + printDynamicReloc(Relocation(Rel, IsMips64EL)); } } } @@ -6344,41 +6353,15 @@ template void LLVMStyle::printDynamic() { } template void LLVMStyle::printDynamicRelocations() { - const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion(); - const DynRegionInfo &DynRelaRegion = this->dumper()->getDynRelaRegion(); - const DynRegionInfo &DynRelrRegion = this->dumper()->getDynRelrRegion(); - const DynRegionInfo &DynPLTRelRegion = this->dumper()->getDynPLTRelRegion(); - const bool IsMips64EL = this->Obj.isMips64EL(); - W.startLine() << "Dynamic Relocations {\n"; W.indent(); - if (DynRelaRegion.Size > 0) { - for (const Elf_Rela &Rela : this->dumper()->dyn_relas()) - printDynamicRelocation(Relocation(Rela, IsMips64EL)); - } - if (DynRelRegion.Size > 0) { - for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) - printDynamicRelocation(Relocation(Rel, IsMips64EL)); - } - - if (DynRelrRegion.Size > 0) { - Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); - for (const Elf_Rel &Rel : this->Obj.decode_relrs(Relrs)) - printDynamicRelocation(Relocation(Rel, IsMips64EL)); - } - if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) - for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Relocation(Rela, IsMips64EL)); - else - for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef()) - printDynamicRelocation(Relocation(Rel, IsMips64EL)); - + this->printDynamicRelocationsHelper(); W.unindent(); W.startLine() << "}\n"; } template -void LLVMStyle::printDynamicRelocation(const Relocation &R) { +void LLVMStyle::printDynamicReloc(const Relocation &R) { SmallString<32> RelocName; this->Obj.getRelocationTypeName(R.Type, RelocName); std::string SymbolName = From 973800dc7cbe28a98030293e77afa8ea0343c37d Mon Sep 17 00:00:00 2001 From: David Truby Date: Mon, 7 Sep 2020 13:37:05 +0100 Subject: [PATCH 348/465] Revert "[MLIR][Shape] Merge `shape` to `std`/`scf` lowerings." This reverts commit 15acdd75439b402e993ebe0dbf8eb02e9b88bbdc. --- mlir/include/mlir/Conversion/Passes.h | 1 + mlir/include/mlir/Conversion/Passes.td | 12 +- .../mlir/Conversion/ShapeToSCF/ShapeToSCF.h | 27 ++ mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt | 19 + mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp | 337 +++++++++++++++ .../ShapeToStandard/ShapeToStandard.cpp | 391 +++--------------- .../Conversion/ShapeToSCF/shape-to-scf.mlir | 132 ++++++ .../ShapeToStandard/shape-to-standard.mlir | 211 ++-------- 8 files changed, 633 insertions(+), 497 deletions(-) create mode 100644 mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h create mode 100644 mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt create mode 100644 mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp create mode 100644 mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h index b04498598b290..5dd10932981ba 100644 --- a/mlir/include/mlir/Conversion/Passes.h +++ b/mlir/include/mlir/Conversion/Passes.h @@ -23,6 +23,7 @@ #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h" #include "mlir/Conversion/SCFToStandard/SCFToStandard.h" #include "mlir/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVMPass.h" +#include "mlir/Conversion/ShapeToSCF/ShapeToSCF.h" #include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h" diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index d4b478dbf4ed0..1b27a7308c7a0 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -239,7 +239,17 @@ def ConvertShapeToStandard : Pass<"convert-shape-to-std", "ModuleOp"> { let summary = "Convert operations from the shape dialect into the standard " "dialect"; let constructor = "mlir::createConvertShapeToStandardPass()"; - let dependentDialects = ["StandardOpsDialect", "scf::SCFDialect"]; + let dependentDialects = ["StandardOpsDialect"]; +} + +//===----------------------------------------------------------------------===// +// ShapeToSCF +//===----------------------------------------------------------------------===// + +def ConvertShapeToSCF : FunctionPass<"convert-shape-to-scf"> { + let summary = "Convert operations from the shape dialect to the SCF dialect"; + let constructor = "mlir::createConvertShapeToSCFPass()"; + let dependentDialects = ["scf::SCFDialect"]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h b/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h new file mode 100644 index 0000000000000..f953f6e2ddf10 --- /dev/null +++ b/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h @@ -0,0 +1,27 @@ +//===- ShapeToSCF.h - Conversion utils from Shape to SCF dialect ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ +#define MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ + +#include + +namespace mlir { + +class MLIRContext; +class FunctionPass; +class OwningRewritePatternList; + +void populateShapeToSCFConversionPatterns(OwningRewritePatternList &patterns, + MLIRContext *ctx); + +std::unique_ptr createConvertShapeToSCFPass(); + +} // namespace mlir + +#endif // MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ diff --git a/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt b/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt new file mode 100644 index 0000000000000..60dd2b8514da4 --- /dev/null +++ b/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt @@ -0,0 +1,19 @@ +add_mlir_conversion_library(MLIRShapeToSCF + ShapeToSCF.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/ShapeToSCF + + DEPENDS + MLIRConversionPassIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRIR + MLIRShape + MLIRPass + MLIRSCF + MLIRTransforms + ) diff --git a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp new file mode 100644 index 0000000000000..ae326c5c513e6 --- /dev/null +++ b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp @@ -0,0 +1,337 @@ +//===- ShapeToSCF.cpp - conversion from Shape to SCF dialect --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/ShapeToSCF/ShapeToSCF.h" + +#include "../PassDetail.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/BlockAndValueMapping.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; +using namespace mlir::shape; +using namespace mlir::scf; + +namespace { +struct BroadcastOpConverter : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(BroadcastOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; +} // namespace + +LogicalResult BroadcastOpConverter::matchAndRewrite( + BroadcastOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + // For now, this lowering is only defined on `tensor` operands, not + // on shapes. + if (op.getType().isa()) + return failure(); + + assert(!op.lhs().getType().isa() && + !op.rhs().getType().isa()); + auto loc = op.getLoc(); + BroadcastOp::Adaptor transformed(operands); + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + + // Find smaller and greater rank and extent tensor. + Value lhsRank = rewriter.create(loc, transformed.lhs(), zero); + Value rhsRank = rewriter.create(loc, transformed.rhs(), zero); + Value lhsSmaller = + rewriter.create(loc, CmpIPredicate::ule, lhsRank, rhsRank); + Type indexTy = rewriter.getIndexType(); + Type extentTensorTy = op.getType(); + auto ifOp = rewriter.create( + loc, TypeRange{indexTy, extentTensorTy, indexTy, extentTensorTy}, + lhsSmaller, + [&](OpBuilder &b, Location loc) { + b.create(loc, ValueRange{lhsRank, transformed.lhs(), + rhsRank, transformed.rhs()}); + }, + [&](OpBuilder &b, Location loc) { + b.create(loc, ValueRange{rhsRank, transformed.rhs(), + lhsRank, transformed.lhs()}); + }); + Value smallerRank = ifOp.getResult(0); + Value smallerOperand = ifOp.getResult(1); + Value greaterRank = ifOp.getResult(2); + Value greaterOperand = ifOp.getResult(3); + + // Allocate stack memory for the broadcasted extent tensor. + Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); + Value mem = rewriter.create(loc, memTy, ValueRange{greaterRank}); + + // Copy extents from greater operand that are not challenged. + Value rankDiff = + rewriter.create(loc, indexTy, greaterRank, smallerRank); + rewriter.create(loc, zero, rankDiff, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange) { + Value extent = b.create( + loc, greaterOperand, ValueRange{iv}); + b.create(loc, extent, mem, ValueRange{iv}); + b.create(loc); + }); + + // Determine remaining broadcasted extents. + rewriter.create( + loc, rankDiff, greaterRank, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange) { + Value greaterOperandExtent = + b.create(loc, greaterOperand, ValueRange{iv}); + Value greaterOperandExtentIsOne = + b.create(loc, CmpIPredicate::eq, greaterOperandExtent, one); + auto ifOp = b.create( + loc, TypeRange{indexTy}, greaterOperandExtentIsOne, + [&](OpBuilder &b, Location loc) { + Value ivShifted = b.create(loc, indexTy, iv, rankDiff); + Value smallerOperandExtent = b.create( + loc, smallerOperand, ValueRange{ivShifted}); + b.create(loc, smallerOperandExtent); + }, + [&](OpBuilder &b, Location loc) { + b.create(loc, greaterOperandExtent); + }); + Value extent = ifOp.getResult(0); + b.create(loc, extent, mem, ValueRange{iv}); + b.create(loc); + }); + + // Load broadcasted shape as an extent tensor. + rewriter.replaceOpWithNewOp(op, mem); + return success(); +} + +namespace { +/// Converts `shape.shape_eq` to an `scf.for` loop. For now, the lowering is +/// only defined on `tensor` operands. The test for equality first +/// compares their size and, if equal, checks every extent for equality. +/// +/// Example: +/// +/// %result = shape.shape_eq %a, %b : tensor, tensor +/// +/// becomes +/// +/// %c0 = constant 0 : index +/// %0 = dim %arg0, %c0 : tensor +/// %1 = dim %arg1, %c0 : tensor +/// %2 = cmpi "eq", %0, %1 : index +/// %result = scf.if %2 -> (i1) { +/// %c1 = constant 1 : index +/// %true = constant true +/// %4 = scf.for %arg2 = %c0 to %0 step %c1 iter_args(%arg3 = %true) -> (i1) { +/// %5 = extract_element %arg0[%arg2] : tensor +/// %6 = extract_element %arg1[%arg2] : tensor +/// %7 = cmpi "eq", %5, %6 : index +/// %8 = and %arg3, %7 : i1 +/// scf.yield %8 : i1 +/// } +/// scf.yield %4 : i1 +/// } else { +/// %false = constant false +/// scf.yield %false : i1 +/// } +/// +struct ShapeEqOpConverter : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ShapeEqOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; +} // namespace + +LogicalResult +ShapeEqOpConverter::matchAndRewrite(ShapeEqOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + // For now, this lowering is only defined on `tensor` operands, not + // on shapes. + if (op.lhs().getType().isa() || + op.rhs().getType().isa()) { + return failure(); + } + + ShapeEqOp::Adaptor transformed(operands); + auto loc = op.getLoc(); + Type indexTy = rewriter.getIndexType(); + Value zero = rewriter.create(loc, 0); + Value lhsRank = rewriter.create(loc, indexTy, transformed.lhs(), zero); + Value rhsRank = rewriter.create(loc, indexTy, transformed.rhs(), zero); + Value eqRank = + rewriter.create(loc, CmpIPredicate::eq, lhsRank, rhsRank); + Type i1Ty = rewriter.getI1Type(); + rewriter.replaceOpWithNewOp( + op, i1Ty, eqRank, + [&](OpBuilder &b, Location loc) { + Value one = b.create(loc, 1); + Value init = b.create(loc, i1Ty, b.getBoolAttr(true)); + auto loop = b.create( + loc, zero, lhsRank, one, ValueRange{init}, + [&](OpBuilder &b, Location nestedLoc, Value iv, ValueRange args) { + Value conj = args[0]; + Value lhsExtent = + b.create(loc, transformed.lhs(), iv); + Value rhsExtent = + b.create(loc, transformed.rhs(), iv); + Value eqExtent = b.create(loc, CmpIPredicate::eq, + lhsExtent, rhsExtent); + Value conjNext = b.create(loc, conj, eqExtent); + b.create(loc, ValueRange({conjNext})); + }); + b.create(loc, loop.getResults()); + }, + [&](OpBuilder &b, Location loc) { + Value result = b.create(loc, i1Ty, b.getBoolAttr(false)); + b.create(loc, result); + }); + return success(); +} + +namespace { +/// Converts `shape.reduce` to `scf.for`. +struct ReduceOpConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(shape::ReduceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final; +}; +} // namespace + +LogicalResult +ReduceOpConverter::matchAndRewrite(shape::ReduceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + // For now, this lowering is only defined on `tensor` operands. + if (op.shape().getType().isa()) + return failure(); + + auto loc = op.getLoc(); + shape::ReduceOp::Adaptor transformed(operands); + + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + Type indexTy = rewriter.getIndexType(); + Value rank = rewriter.create(loc, indexTy, transformed.shape(), zero); + + auto loop = rewriter.create( + loc, zero, rank, one, op.initVals(), + [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { + Value extent = b.create(loc, transformed.shape(), iv); + + SmallVector mappedValues{iv, extent}; + mappedValues.append(args.begin(), args.end()); + + BlockAndValueMapping mapping; + Block *reduceBody = op.getBody(); + mapping.map(reduceBody->getArguments(), mappedValues); + for (auto &nested : reduceBody->without_terminator()) + b.clone(nested, mapping); + + SmallVector mappedResults; + for (auto result : reduceBody->getTerminator()->getOperands()) + mappedResults.push_back(mapping.lookup(result)); + b.create(loc, mappedResults); + }); + + rewriter.replaceOp(op, loop.getResults()); + return success(); +} + +namespace { +/// Converts `shape_of` to for loop for unranked tensors. +class ShapeOfOpConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ShapeOfOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; +} // namespace + +LogicalResult +ShapeOfOpConverter::matchAndRewrite(ShapeOfOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + // For now, this lowering supports only error-free arguments. + if (op.getType().isa()) + return failure(); + + // For ranked tensors `shape_of` lowers to `std` and the pattern can be + // found in the corresponding pass. + ShapeOfOp::Adaptor transformed(operands); + Value arg = transformed.arg(); + Type argTy = arg.getType(); + if (argTy.isa()) + return failure(); + + // Allocate stack memory. + auto loc = op.getLoc(); + Value rank = rewriter.create(loc, arg); + Type indexTy = rewriter.getIndexType(); + Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); + Value mem = rewriter.create(loc, memTy, ValueRange{rank}); + + // Copy shape extents to stack-allocated memory. + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + rewriter.create( + loc, zero, rank, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { + Value dim = rewriter.create(loc, arg, iv); + rewriter.create(loc, dim, mem, ValueRange{iv}); + rewriter.create(loc); + }); + + // Load extents to tensor value. + rewriter.replaceOpWithNewOp(op.getOperation(), mem); + return success(); +} + +namespace { +struct ConvertShapeToSCFPass + : public ConvertShapeToSCFBase { + void runOnFunction() override; +}; +} // namespace + +void ConvertShapeToSCFPass::runOnFunction() { + MLIRContext &ctx = getContext(); + + // Populate conversion patterns. + OwningRewritePatternList patterns; + populateShapeToSCFConversionPatterns(patterns, &ctx); + + // Setup target legality. + ConversionTarget target(getContext()); + target.addLegalDialect(); + + // Apply conversion. + if (failed(applyPartialConversion(getFunction(), target, patterns))) + signalPassFailure(); +} + +void mlir::populateShapeToSCFConversionPatterns( + OwningRewritePatternList &patterns, MLIRContext *ctx) { + // clang-format off + patterns.insert< + BroadcastOpConverter, + ShapeEqOpConverter, + ReduceOpConverter, + ShapeOfOpConverter>(ctx); + // clang-format on +} + +std::unique_ptr mlir::createConvertShapeToSCFPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp index 8c917e08f942c..e92bb83d4f424 100644 --- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp +++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp @@ -12,12 +12,10 @@ #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/Shape/IR/Shape.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/IR/BlockAndValueMapping.h" #include "mlir/Transforms/DialectConversion.h" using namespace mlir; using namespace mlir::shape; -using namespace mlir::scf; /// Conversion patterns. namespace { @@ -65,94 +63,67 @@ class BinaryOpConversion : public OpConversionPattern { } // namespace namespace { -struct BroadcastOpConverter : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; +class ConstSizeOpConversion : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ConstSizeOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp(op, op.value().getSExtValue()); + return success(); + } +}; +} // namespace + +namespace { +class ShapeOfOpConversion : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(BroadcastOp op, ArrayRef operands, + matchAndRewrite(ShapeOfOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override; }; } // namespace -LogicalResult BroadcastOpConverter::matchAndRewrite( - BroadcastOp op, ArrayRef operands, +LogicalResult ShapeOfOpConversion::matchAndRewrite( + ShapeOfOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands, not - // on shapes. + + // For now, only error-free types are supported by this lowering. if (op.getType().isa()) return failure(); - assert(!op.lhs().getType().isa() && - !op.rhs().getType().isa()); + // For unranked tensors `shape_of` lowers to `scf` and the pattern can be + // found in the corresponding pass. + ShapeOfOp::Adaptor transformed(operands); + Value tensorVal = transformed.arg(); + Type tensorTy = tensorVal.getType(); + if (tensorTy.isa()) + return failure(); + + // Build values for individual dimensions. + SmallVector dimValues; + RankedTensorType rankedTensorTy = tensorTy.cast(); + int64_t rank = rankedTensorTy.getRank(); auto loc = op.getLoc(); - BroadcastOp::Adaptor transformed(operands); - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - - // Find smaller and greater rank and extent tensor. - Value lhsRank = rewriter.create(loc, transformed.lhs(), zero); - Value rhsRank = rewriter.create(loc, transformed.rhs(), zero); - Value lhsSmaller = - rewriter.create(loc, CmpIPredicate::ule, lhsRank, rhsRank); - Type indexTy = rewriter.getIndexType(); - Type extentTensorTy = op.getType(); - auto ifOp = rewriter.create( - loc, TypeRange{indexTy, extentTensorTy, indexTy, extentTensorTy}, - lhsSmaller, - [&](OpBuilder &b, Location loc) { - b.create(loc, ValueRange{lhsRank, transformed.lhs(), - rhsRank, transformed.rhs()}); - }, - [&](OpBuilder &b, Location loc) { - b.create(loc, ValueRange{rhsRank, transformed.rhs(), - lhsRank, transformed.lhs()}); - }); - Value smallerRank = ifOp.getResult(0); - Value smallerOperand = ifOp.getResult(1); - Value greaterRank = ifOp.getResult(2); - Value greaterOperand = ifOp.getResult(3); - - // Allocate stack memory for the broadcasted extent tensor. - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); - Value mem = rewriter.create(loc, memTy, ValueRange{greaterRank}); - - // Copy extents from greater operand that are not challenged. - Value rankDiff = - rewriter.create(loc, indexTy, greaterRank, smallerRank); - rewriter.create(loc, zero, rankDiff, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange) { - Value extent = b.create( - loc, greaterOperand, ValueRange{iv}); - b.create(loc, extent, mem, ValueRange{iv}); - b.create(loc); - }); - - // Determine remaining broadcasted extents. - rewriter.create( - loc, rankDiff, greaterRank, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange) { - Value greaterOperandExtent = - b.create(loc, greaterOperand, ValueRange{iv}); - Value greaterOperandExtentIsOne = - b.create(loc, CmpIPredicate::eq, greaterOperandExtent, one); - auto ifOp = b.create( - loc, TypeRange{indexTy}, greaterOperandExtentIsOne, - [&](OpBuilder &b, Location loc) { - Value ivShifted = b.create(loc, indexTy, iv, rankDiff); - Value smallerOperandExtent = b.create( - loc, smallerOperand, ValueRange{ivShifted}); - b.create(loc, smallerOperandExtent); - }, - [&](OpBuilder &b, Location loc) { - b.create(loc, greaterOperandExtent); - }); - Value extent = ifOp.getResult(0); - b.create(loc, extent, mem, ValueRange{iv}); - b.create(loc); - }); - - // Load broadcasted shape as an extent tensor. - rewriter.replaceOpWithNewOp(op, mem); + for (int64_t i = 0; i < rank; i++) { + if (rankedTensorTy.isDynamicDim(i)) { + Value dimVal = rewriter.create(loc, tensorVal, i); + dimValues.push_back(dimVal); + } else { + int64_t dim = rankedTensorTy.getDimSize(i); + Value dimVal = rewriter.create(loc, dim); + dimValues.push_back(dimVal); + } + } + + // Materialize extent tensor. + Value staticExtentTensor = + rewriter.create(loc, dimValues); + rewriter.replaceOpWithNewOp(op, staticExtentTensor, + op.getType()); return success(); } @@ -190,23 +161,26 @@ LogicalResult ConstShapeOpConverter::matchAndRewrite( } namespace { -class ConstSizeOpConversion : public OpConversionPattern { +class ToExtentTensorOpConversion + : public OpConversionPattern { public: - using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(ConstSizeOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; + matchAndRewrite(ToExtentTensorOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + ToExtentTensorOpAdaptor adaptor(operands); + + if (!adaptor.input().getType().isa()) + return rewriter.notifyMatchFailure(op, "input needs to be a tensor"); + + rewriter.replaceOpWithNewOp(op, adaptor.input(), + op.getType()); + return success(); + } }; } // namespace -LogicalResult ConstSizeOpConversion::matchAndRewrite( - ConstSizeOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - rewriter.replaceOpWithNewOp(op, op.value().getSExtValue()); - return success(); -} - namespace { class GetExtentOpConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -265,236 +239,6 @@ RankOpConverter::matchAndRewrite(shape::RankOp op, ArrayRef operands, return success(); } -namespace { -/// Converts `shape.reduce` to `scf.for`. -struct ReduceOpConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(shape::ReduceOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final; -}; -} // namespace - -LogicalResult -ReduceOpConverter::matchAndRewrite(shape::ReduceOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands. - if (op.shape().getType().isa()) - return failure(); - - auto loc = op.getLoc(); - shape::ReduceOp::Adaptor transformed(operands); - - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - Type indexTy = rewriter.getIndexType(); - Value rank = rewriter.create(loc, indexTy, transformed.shape(), zero); - - auto loop = rewriter.create( - loc, zero, rank, one, op.initVals(), - [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { - Value extent = b.create(loc, transformed.shape(), iv); - - SmallVector mappedValues{iv, extent}; - mappedValues.append(args.begin(), args.end()); - - BlockAndValueMapping mapping; - Block *reduceBody = op.getBody(); - mapping.map(reduceBody->getArguments(), mappedValues); - for (auto &nested : reduceBody->without_terminator()) - b.clone(nested, mapping); - - SmallVector mappedResults; - for (auto result : reduceBody->getTerminator()->getOperands()) - mappedResults.push_back(mapping.lookup(result)); - b.create(loc, mappedResults); - }); - - rewriter.replaceOp(op, loop.getResults()); - return success(); -} - -namespace { -/// Converts `shape.shape_eq` to an `scf.for` loop. For now, the lowering is -/// only defined on `tensor` operands. The test for equality first -/// compares their size and, if equal, checks every extent for equality. -/// -/// Example: -/// -/// %result = shape.shape_eq %a, %b : tensor, tensor -/// -/// becomes -/// -/// %c0 = constant 0 : index -/// %0 = dim %arg0, %c0 : tensor -/// %1 = dim %arg1, %c0 : tensor -/// %2 = cmpi "eq", %0, %1 : index -/// %result = scf.if %2 -> (i1) { -/// %c1 = constant 1 : index -/// %true = constant true -/// %4 = scf.for %arg2 = %c0 to %0 step %c1 iter_args(%arg3 = %true) -> (i1) { -/// %5 = extract_element %arg0[%arg2] : tensor -/// %6 = extract_element %arg1[%arg2] : tensor -/// %7 = cmpi "eq", %5, %6 : index -/// %8 = and %arg3, %7 : i1 -/// scf.yield %8 : i1 -/// } -/// scf.yield %4 : i1 -/// } else { -/// %false = constant false -/// scf.yield %false : i1 -/// } -/// -struct ShapeEqOpConverter : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ShapeEqOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; -}; -} // namespace - -LogicalResult -ShapeEqOpConverter::matchAndRewrite(ShapeEqOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands, not - // on shapes. - if (op.lhs().getType().isa() || - op.rhs().getType().isa()) { - return failure(); - } - - ShapeEqOp::Adaptor transformed(operands); - auto loc = op.getLoc(); - Type indexTy = rewriter.getIndexType(); - Value zero = rewriter.create(loc, 0); - Value lhsRank = rewriter.create(loc, indexTy, transformed.lhs(), zero); - Value rhsRank = rewriter.create(loc, indexTy, transformed.rhs(), zero); - Value eqRank = - rewriter.create(loc, CmpIPredicate::eq, lhsRank, rhsRank); - Type i1Ty = rewriter.getI1Type(); - rewriter.replaceOpWithNewOp( - op, i1Ty, eqRank, - [&](OpBuilder &b, Location loc) { - Value one = b.create(loc, 1); - Value init = b.create(loc, i1Ty, b.getBoolAttr(true)); - auto loop = b.create( - loc, zero, lhsRank, one, ValueRange{init}, - [&](OpBuilder &b, Location nestedLoc, Value iv, ValueRange args) { - Value conj = args[0]; - Value lhsExtent = - b.create(loc, transformed.lhs(), iv); - Value rhsExtent = - b.create(loc, transformed.rhs(), iv); - Value eqExtent = b.create(loc, CmpIPredicate::eq, - lhsExtent, rhsExtent); - Value conjNext = b.create(loc, conj, eqExtent); - b.create(loc, ValueRange({conjNext})); - }); - b.create(loc, loop.getResults()); - }, - [&](OpBuilder &b, Location loc) { - Value result = b.create(loc, i1Ty, b.getBoolAttr(false)); - b.create(loc, result); - }); - return success(); -} - -namespace { -class ShapeOfOpConversion : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ShapeOfOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; -}; -} // namespace - -LogicalResult ShapeOfOpConversion::matchAndRewrite( - ShapeOfOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - - // For now, only error-free types are supported by this lowering. - if (op.getType().isa()) - return failure(); - - // For ranked tensor arguments, lower to `tensor_from_elements`. - ShapeOfOp::Adaptor transformed(operands); - Value tensor = transformed.arg(); - Type tensorTy = tensor.getType(); - if (tensorTy.isa()) { - - // Build values for individual extents. - SmallVector extentValues; - RankedTensorType rankedTensorTy = tensorTy.cast(); - int64_t rank = rankedTensorTy.getRank(); - auto loc = op.getLoc(); - for (int64_t i = 0; i < rank; i++) { - if (rankedTensorTy.isDynamicDim(i)) { - Value extent = rewriter.create(loc, tensor, i); - extentValues.push_back(extent); - } else { - Value extent = - rewriter.create(loc, rankedTensorTy.getDimSize(i)); - extentValues.push_back(extent); - } - } - - // Materialize extent tensor. - Value staticExtentTensor = - rewriter.create(loc, extentValues); - rewriter.replaceOpWithNewOp(op, staticExtentTensor, - op.getType()); - return success(); - } - - // Allocate stack memory. - auto loc = op.getLoc(); - Value rank = rewriter.create(loc, tensor); - Type indexTy = rewriter.getIndexType(); - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); - Value mem = rewriter.create(loc, memTy, ValueRange{rank}); - - // Copy shape extents to stack-allocated memory. - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - rewriter.create( - loc, zero, rank, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { - Value dim = rewriter.create(loc, tensor, iv); - rewriter.create(loc, dim, mem, ValueRange{iv}); - rewriter.create(loc); - }); - - // Load extents to tensor value. - rewriter.replaceOpWithNewOp(op.getOperation(), mem); - return success(); -} - -namespace { -class ToExtentTensorOpConversion - : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ToExtentTensorOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - ToExtentTensorOpAdaptor adaptor(operands); - - if (!adaptor.input().getType().isa()) - return rewriter.notifyMatchFailure(op, "input needs to be a tensor"); - - rewriter.replaceOpWithNewOp(op, adaptor.input(), - op.getType()); - return success(); - } -}; -} // namespace - namespace { /// Conversion pass. class ConvertShapeToStandardPass @@ -508,7 +252,7 @@ void ConvertShapeToStandardPass::runOnOperation() { // Setup target legality. MLIRContext &ctx = getContext(); ConversionTarget target(ctx); - target.addLegalDialect(); + target.addLegalDialect(); target.addLegalOp(); // Setup conversion patterns. @@ -527,14 +271,11 @@ void mlir::populateShapeToStandardConversionPatterns( patterns.insert< AnyOpConversion, BinaryOpConversion, - BinaryOpConversion, - BroadcastOpConverter, ConstShapeOpConverter, + BinaryOpConversion, ConstSizeOpConversion, GetExtentOpConverter, RankOpConverter, - ReduceOpConverter, - ShapeEqOpConverter, ShapeOfOpConversion, ToExtentTensorOpConversion>(ctx); // clang-format on diff --git a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir new file mode 100644 index 0000000000000..cc384496dff05 --- /dev/null +++ b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir @@ -0,0 +1,132 @@ +// RUN: mlir-opt -convert-shape-to-scf -split-input-file %s | FileCheck %s + +// CHECK-LABEL: @shape_reduce +// CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index +func @shape_reduce(%shape : tensor) -> index { + %init = constant 1 : index + %num_elements = shape.reduce(%shape, %init) : tensor -> index { + ^bb0(%index : index, %extent : index, %acc: index): + %new_acc = muli %acc, %extent : index + shape.yield %new_acc : index + } + return %num_elements : index +} +// CHECK-NEXT: %[[INIT:.*]] = constant 1 : index +// CHECK-NEXT: %[[C0:.*]] = constant 0 : index +// CHECK-NEXT: %[[C1:.*]] = constant 1 : index +// CHECK-NEXT: %[[RANK:.*]] = dim %[[SHAPE]], %[[C0]] : tensor +// CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) -> (index) +// CHECK-NEXT: %[[EXTENT:.*]] = extract_element %[[SHAPE]][%[[I]]] +// CHECK-NEXT: %[[NEW_ACC:.*]] = muli %[[ACC]], %[[EXTENT]] : index +// CHECK-NEXT: scf.yield %[[NEW_ACC]] : index +// CHECK-NEXT: } +// CHECK-NEXT: return %[[RESULT]] : index + +// ----- + +// Don't lower `shape_of` for result type of `shape.shape`. +// CHECK-LABEL: @shape_of +// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) +func @shape_of(%arg : tensor<*xf32>) { + // CHECK: shape.shape + %shape = shape.shape_of %arg : tensor<*xf32> -> !shape.shape + return +} + +// ----- + +// Lower `shape_of` for unranked tensors. +// CHECK-LABEL: @shape_of_unranked +// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) +func @shape_of_unranked(%arg : tensor<*xf32>) { + // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> + // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { + // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> + // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref + // CHECK: } + // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref + %shape = shape.shape_of %arg : tensor<*xf32> -> tensor + return +} + +// ----- + +// CHECK-LABEL: @shape_eq +// CHECK-SAME: (%[[A:.*]]: tensor, %[[B:.*]]: tensor) -> i1 +func @shape_eq(%a : tensor, %b : tensor) -> i1 { + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[RANK_A:.*]] = dim %[[A]], %[[C0]] : tensor + // CHECK: %[[RANK_B:.*]] = dim %[[B]], %[[C0]] : tensor + // CHECK: %[[RANK_EQ:.*]] = cmpi "eq", %[[RANK_A]], %[[RANK_B]] + // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) { + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: %[[INIT:.*]] = constant true + // CHECK: %[[SHAPE_EQ_INNER:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK_A]] step %[[C1]] iter_args(%[[CONJ:.*]] = %[[INIT]]) -> (i1) { + // CHECK: %[[EXTENT_A:.*]] = extract_element %[[A]][%[[I]]] : tensor + // CHECK: %[[EXTENT_B:.*]] = extract_element %[[B]][%[[I]]] : tensor + // CHECK: %[[EXTENT_EQ:.*]] = cmpi "eq", %[[EXTENT_A]], %[[EXTENT_B]] + // CHECK: %[[CONJ_NEXT:.*]] = and %[[CONJ]], %[[EXTENT_EQ]] + // CHECK: scf.yield %[[CONJ_NEXT]] : i1 + // CHECK: } + // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 + // CHECK: } else { + // CHECK: %[[SHAPE_EQ_INNER:.*]] = constant false + // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 + // CHECK: } + // CHECK: return %[[SHAPE_EQ]] : i1 + %result = shape.shape_eq %a, %b : tensor, tensor + return %result : i1 +} + +// ----- + +// Don't lower `shape.broadcast` if a `shape.shape` type is involved. +// CHECK-LABEL: @broadcast +func @broadcast(%a : tensor, %b : !shape.shape) -> !shape.shape { + // CHECK: shape.broadcast + %c = shape.broadcast %a, %b : tensor, !shape.shape -> !shape.shape + return %c : !shape.shape +} + +// ----- + +// CHECK-LABEL: @broadcast +// CHECK-SAME: (%[[LHS:.*]]: tensor, %[[RHS:.*]]: tensor) +func @broadcast(%a : tensor, %b : tensor) { + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor + // CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor + // CHECK: %[[LHS_SMALLER:.*]] = cmpi "ule", %[[LHS_RANK]], %[[RHS_RANK]] + // CHECK: %[[ARG:.*]]:4 = scf.if %[[LHS_SMALLER]] -> (index, tensor, index, tensor) { + // CHECK: scf.yield %[[LHS_RANK]], %[[LHS]], %[[RHS_RANK]], %[[RHS]] : index, tensor, index, tensor + // CHECK: } else { + // CHECK: scf.yield %[[RHS_RANK]], %[[RHS]], %[[LHS_RANK]], %[[LHS]] : index, tensor, index, tensor + // CHECK: } + // CHECK: %[[MEM:.*]] = alloca(%[[ARG]]#2) : memref + // CHECK: %[[RANK_DIFF:.*]] = subi %[[ARG]]#2, %[[ARG]]#0 : index + // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[RANK_DIFF]] step %[[C1]] { + // CHECK: %[[EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor + // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref + // CHECK: } + // CHECK: scf.for %[[IV:.*]] = %[[RANK_DIFF]] to %[[ARG]]#2 step %[[C1]] { + // CHECK: %[[GREATER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor + // CHECK: %[[GREATER_OPERAND_EXTENT_IS_ONE:.*]] = cmpi "eq", %[[GREATER_OPERAND_EXTENT]], %[[C1]] : index + // CHECK: %[[EXTENT:.*]] = scf.if %[[GREATER_OPERAND_EXTENT_IS_ONE]] -> (index) { + // CHECK: %[[IV_SHIFTED:.*]] = subi %[[IV]], %[[RANK_DIFF]] : index + // CHECK: %[[SMALLER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#1[%[[IV_SHIFTED]]] : tensor + // CHECK: scf.yield %[[SMALLER_OPERAND_EXTENT]] : index + // CHECK: } else { + // CHECK: scf.yield %[[GREATER_OPERAND_EXTENT]] : index + // CHECK: } + // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref + // CHECK: } + // CHECK: %[[BROADCASTED:.*]] = tensor_load %[[MEM]] : memref + %0 = shape.broadcast %a, %b + : tensor, tensor -> tensor + return +} + diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index bf8e74e5143ed..b0fb5bac9071b 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -26,6 +26,46 @@ func @binary_ops_on_size(%lhs : !shape.size, %rhs : !shape.size) { // ----- +// Don't lower `shape_of` with `shape.shape` type. +// CHECK-LABEL: @shape_of +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) +func @shape_of_stat(%arg : tensor<1x2x3xf32>) { + // CHECK: shape.shape_of %[[ARG]] : tensor<1x2x3xf32> -> !shape.shape + %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> !shape.shape + return +} + +// ----- + +// Lower `shape_of` for statically shaped tensor. +// CHECK-LABEL: @shape_of_stat +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) +func @shape_of_stat(%arg : tensor<1x2x3xf32>) { + // CHECK-DAG: %[[C1:.*]] = constant 1 : index + // CHECK-DAG: %[[C2:.*]] = constant 2 : index + // CHECK-DAG: %[[C3:.*]] = constant 3 : index + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) : tensor<3xindex> + %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> tensor + return +} + +// ----- + +// Lower `shape_of` for dynamically shaped tensor. +// CHECK-LABEL: @shape_of_dyn +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x5x?xf32>) +func @shape_of_dyn(%arg : tensor<1x5x?xf32>) { + // CHECK-DAG: %[[C1:.*]] = constant 1 : index + // CHECK-DAG: %[[C5:.*]] = constant 5 : index + // CHECK-DAG: %[[C2:.*]] = constant 2 : index + // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32> + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C5]], %[[DYN_DIM]]) : tensor<3xindex> + %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor + return +} + +// ----- + // Convert `rank` to `dim` of the first dimension. // CHECK-LABEL: @rank // CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index @@ -150,174 +190,3 @@ func @to_extent_tensor(%arg: tensor) -> tensor<3xindex> { // CHECK: return %[[RES]] return %casted : tensor<3xindex> } - -// CHECK-LABEL: @shape_reduce -// CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index -func @shape_reduce(%shape : tensor) -> index { - %init = constant 1 : index - %num_elements = shape.reduce(%shape, %init) : tensor -> index { - ^bb0(%index : index, %extent : index, %acc: index): - %new_acc = muli %acc, %extent : index - shape.yield %new_acc : index - } - return %num_elements : index -} -// CHECK-NEXT: %[[INIT:.*]] = constant 1 : index -// CHECK-NEXT: %[[C0:.*]] = constant 0 : index -// CHECK-NEXT: %[[C1:.*]] = constant 1 : index -// CHECK-NEXT: %[[RANK:.*]] = dim %[[SHAPE]], %[[C0]] : tensor -// CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) -> (index) -// CHECK-NEXT: %[[EXTENT:.*]] = extract_element %[[SHAPE]][%[[I]]] -// CHECK-NEXT: %[[NEW_ACC:.*]] = muli %[[ACC]], %[[EXTENT]] : index -// CHECK-NEXT: scf.yield %[[NEW_ACC]] : index -// CHECK-NEXT: } -// CHECK-NEXT: return %[[RESULT]] : index - -// ----- - -// Don't lower `shape_of` for result type of `shape.shape`. -// CHECK-LABEL: @shape_of -// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) -func @shape_of(%arg : tensor<*xf32>) { - // CHECK: shape.shape - %shape = shape.shape_of %arg : tensor<*xf32> -> !shape.shape - return -} - -// ----- - -// Lower `shape_of` for unranked tensors. -// CHECK-LABEL: @shape_of_unranked -// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) -func @shape_of_unranked(%arg : tensor<*xf32>) { - // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> - // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { - // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> - // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref - // CHECK: } - // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref - %shape = shape.shape_of %arg : tensor<*xf32> -> tensor - return -} - -// ----- - -// Don't lower `shape_of` with `shape.shape` type. -// CHECK-LABEL: @shape_of -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) -func @shape_of_stat(%arg : tensor<1x2x3xf32>) { - // CHECK: shape.shape_of %[[ARG]] : tensor<1x2x3xf32> -> !shape.shape - %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> !shape.shape - return -} - -// ----- - -// Lower `shape_of` for statically shaped tensor. -// CHECK-LABEL: @shape_of_stat -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) -func @shape_of_stat(%arg : tensor<1x2x3xf32>) { - // CHECK-DAG: %[[C1:.*]] = constant 1 : index - // CHECK-DAG: %[[C2:.*]] = constant 2 : index - // CHECK-DAG: %[[C3:.*]] = constant 3 : index - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) : tensor<3xindex> - %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> tensor - return -} - -// ----- - -// Lower `shape_of` for dynamically shaped tensor. -// CHECK-LABEL: @shape_of_dyn -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x5x?xf32>) -func @shape_of_dyn(%arg : tensor<1x5x?xf32>) { - // CHECK-DAG: %[[C1:.*]] = constant 1 : index - // CHECK-DAG: %[[C5:.*]] = constant 5 : index - // CHECK-DAG: %[[C2:.*]] = constant 2 : index - // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32> - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C5]], %[[DYN_DIM]]) : tensor<3xindex> - %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor - return -} - -// ----- - -// CHECK-LABEL: @shape_eq -// CHECK-SAME: (%[[A:.*]]: tensor, %[[B:.*]]: tensor) -> i1 -func @shape_eq(%a : tensor, %b : tensor) -> i1 { - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[RANK_A:.*]] = dim %[[A]], %[[C0]] : tensor - // CHECK: %[[RANK_B:.*]] = dim %[[B]], %[[C0]] : tensor - // CHECK: %[[RANK_EQ:.*]] = cmpi "eq", %[[RANK_A]], %[[RANK_B]] - // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) { - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: %[[INIT:.*]] = constant true - // CHECK: %[[SHAPE_EQ_INNER:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK_A]] step %[[C1]] iter_args(%[[CONJ:.*]] = %[[INIT]]) -> (i1) { - // CHECK: %[[EXTENT_A:.*]] = extract_element %[[A]][%[[I]]] : tensor - // CHECK: %[[EXTENT_B:.*]] = extract_element %[[B]][%[[I]]] : tensor - // CHECK: %[[EXTENT_EQ:.*]] = cmpi "eq", %[[EXTENT_A]], %[[EXTENT_B]] - // CHECK: %[[CONJ_NEXT:.*]] = and %[[CONJ]], %[[EXTENT_EQ]] - // CHECK: scf.yield %[[CONJ_NEXT]] : i1 - // CHECK: } - // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 - // CHECK: } else { - // CHECK: %[[SHAPE_EQ_INNER:.*]] = constant false - // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 - // CHECK: } - // CHECK: return %[[SHAPE_EQ]] : i1 - %result = shape.shape_eq %a, %b : tensor, tensor - return %result : i1 -} - -// ----- - -// Don't lower `shape.broadcast` if a `shape.shape` type is involved. -// CHECK-LABEL: @broadcast -func @broadcast(%a : tensor, %b : !shape.shape) -> !shape.shape { - // CHECK: shape.broadcast - %c = shape.broadcast %a, %b : tensor, !shape.shape -> !shape.shape - return %c : !shape.shape -} - -// ----- - -// CHECK-LABEL: @broadcast -// CHECK-SAME: (%[[LHS:.*]]: tensor, %[[RHS:.*]]: tensor) -func @broadcast(%a : tensor, %b : tensor) { - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor - // CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor - // CHECK: %[[LHS_SMALLER:.*]] = cmpi "ule", %[[LHS_RANK]], %[[RHS_RANK]] - // CHECK: %[[ARG:.*]]:4 = scf.if %[[LHS_SMALLER]] -> (index, tensor, index, tensor) { - // CHECK: scf.yield %[[LHS_RANK]], %[[LHS]], %[[RHS_RANK]], %[[RHS]] : index, tensor, index, tensor - // CHECK: } else { - // CHECK: scf.yield %[[RHS_RANK]], %[[RHS]], %[[LHS_RANK]], %[[LHS]] : index, tensor, index, tensor - // CHECK: } - // CHECK: %[[MEM:.*]] = alloca(%[[ARG]]#2) : memref - // CHECK: %[[RANK_DIFF:.*]] = subi %[[ARG]]#2, %[[ARG]]#0 : index - // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[RANK_DIFF]] step %[[C1]] { - // CHECK: %[[EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor - // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref - // CHECK: } - // CHECK: scf.for %[[IV:.*]] = %[[RANK_DIFF]] to %[[ARG]]#2 step %[[C1]] { - // CHECK: %[[GREATER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor - // CHECK: %[[GREATER_OPERAND_EXTENT_IS_ONE:.*]] = cmpi "eq", %[[GREATER_OPERAND_EXTENT]], %[[C1]] : index - // CHECK: %[[EXTENT:.*]] = scf.if %[[GREATER_OPERAND_EXTENT_IS_ONE]] -> (index) { - // CHECK: %[[IV_SHIFTED:.*]] = subi %[[IV]], %[[RANK_DIFF]] : index - // CHECK: %[[SMALLER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#1[%[[IV_SHIFTED]]] : tensor - // CHECK: scf.yield %[[SMALLER_OPERAND_EXTENT]] : index - // CHECK: } else { - // CHECK: scf.yield %[[GREATER_OPERAND_EXTENT]] : index - // CHECK: } - // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref - // CHECK: } - // CHECK: %[[BROADCASTED:.*]] = tensor_load %[[MEM]] : memref - %0 = shape.broadcast %a, %b - : tensor, tensor -> tensor - return -} - From 23f700c785a141355fa6d022552aafc73135bf5d Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Mon, 7 Sep 2020 14:50:13 +0200 Subject: [PATCH 349/465] Revert "[clang] Prevent that Decl::dump on a CXXRecordDecl deserialises further declarations." This reverts commit 0478720157f6413fad7595b8eff9c70d2d99b637. This probably doesn't work when forcing deserialising while dumping (which the ASTDumper optionally supports). --- clang/lib/AST/TextNodeDumper.cpp | 6 +- clang/test/AST/ast-dump-lambda.cpp | 32 +++---- clang/test/AST/ast-dump-records.cpp | 22 ++--- clang/unittests/AST/ASTDumpTest.cpp | 140 ---------------------------- clang/unittests/AST/CMakeLists.txt | 1 - 5 files changed, 28 insertions(+), 173 deletions(-) delete mode 100644 clang/unittests/AST/ASTDumpTest.cpp diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 19b7b4c801d55..16c4c3736a4a3 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1960,11 +1960,7 @@ void TextNodeDumper::VisitCXXRecordDecl(const CXXRecordDecl *D) { FLAG(hasTrivialDestructor, trivial); FLAG(hasNonTrivialDestructor, non_trivial); FLAG(hasUserDeclaredDestructor, user_declared); - // Avoid calls to the external source. - if (!D->hasExternalVisibleStorage()) { - FLAG(hasConstexprDestructor, constexpr); - } else - OS << " maybe_constexpr"; + FLAG(hasConstexprDestructor, constexpr); FLAG(needsImplicitDestructor, needs_implicit); FLAG(needsOverloadResolutionForDestructor, needs_overload_resolution); if (!D->needsOverloadResolutionForDestructor()) diff --git a/clang/test/AST/ast-dump-lambda.cpp b/clang/test/AST/ast-dump-lambda.cpp index 302b93734459b..37fb62ef9930e 100644 --- a/clang/test/AST/ast-dump-lambda.cpp +++ b/clang/test/AST/ast-dump-lambda.cpp @@ -48,7 +48,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | |-CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | |-MoveAssignment exists simple trivial needs_implicit -// CHECK-NEXT: | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | |-CXXRecordDecl {{.*}} col:10{{( imported)?}} implicit struct V // CHECK-NEXT: | `-CXXMethodDecl {{.*}} line:17:10{{( imported)?}} f 'void ()' // CHECK-NEXT: | `-CompoundStmt {{.*}} @@ -60,7 +60,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | | |-MoveAssignment -// CHECK-NEXT: | | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | | |-CXXMethodDecl {{.*}} col:7{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | | `-FieldDecl {{.*}} col:8{{( imported)?}} implicit 'V *' @@ -75,7 +75,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:7{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | `-FieldDecl {{.*}} col:8{{( imported)?}} implicit 'V' @@ -94,7 +94,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | |-CXXConversionDecl {{.*}} col:3{{( imported)?}} implicit constexpr operator auto (*)() 'auto (*() const noexcept)()' inline @@ -108,7 +108,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto (int, ...) const' inline // CHECK-NEXT: | | | |-ParmVarDecl {{.*}} col:10{{( imported)?}} a 'int' // CHECK-NEXT: | | | `-CompoundStmt {{.*}} @@ -124,7 +124,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | `-FieldDecl {{.*}} col:4{{( imported)?}} implicit 'Ts...' @@ -139,7 +139,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | `-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | `-CompoundStmt {{.*}} @@ -151,7 +151,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | `-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | `-ReturnStmt {{.*}} @@ -167,7 +167,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | `-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | `-CompoundStmt {{.*}} @@ -179,7 +179,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | `-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | `-ReturnStmt {{.*}} @@ -195,7 +195,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | | `-ReturnStmt {{.*}} @@ -224,7 +224,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> auto' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | |-FieldDecl {{.*}} col:4{{( imported)?}} implicit 'Ts...' @@ -241,7 +241,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} constexpr operator() 'auto () const' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | |-CXXConversionDecl {{.*}} col:3{{( imported)?}} implicit constexpr operator auto (*)() 'auto (*() const noexcept)()' inline @@ -255,7 +255,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto ()' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | |-CXXConversionDecl {{.*}} col:3{{( imported)?}} implicit constexpr operator auto (*)() 'auto (*() const noexcept)()' inline @@ -269,7 +269,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | | |-MoveAssignment -// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const noexcept' inline // CHECK-NEXT: | | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | |-CXXConversionDecl {{.*}} col:3{{( imported)?}} implicit constexpr operator auto (*)() noexcept 'auto (*() const noexcept)() noexcept' inline @@ -283,7 +283,7 @@ template void test(Ts... a) { // CHECK-NEXT: | | |-MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: | | |-MoveAssignment -// CHECK-NEXT: | | `-Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit +// CHECK-NEXT: | | `-Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: | |-CXXMethodDecl {{.*}} col:3{{( imported)?}} operator() 'auto () const -> int' inline // CHECK-NEXT: | | `-CompoundStmt {{.*}} // CHECK-NEXT: | | `-ReturnStmt {{.*}} diff --git a/clang/test/AST/ast-dump-records.cpp b/clang/test/AST/ast-dump-records.cpp index cdaa2ef16eba8..cb7ac83204312 100644 --- a/clang/test/AST/ast-dump-records.cpp +++ b/clang/test/AST/ast-dump-records.cpp @@ -22,7 +22,7 @@ struct A { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: CXXRecordDecl 0x{{[^ ]*}} col:8 implicit struct A int a; @@ -57,7 +57,7 @@ struct C { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: CXXRecordDecl 0x{{[^ ]*}} col:8 implicit struct C struct { @@ -68,7 +68,7 @@ struct C { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit int a; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 a 'int' } b; @@ -82,7 +82,7 @@ struct C { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit int c; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 c 'int' float d; @@ -104,7 +104,7 @@ struct C { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit int e, f; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 e 'int' // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:12 f 'int' @@ -126,7 +126,7 @@ struct D { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: CXXRecordDecl 0x{{[^ ]*}} col:8 implicit struct D int a; @@ -151,7 +151,7 @@ union E { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: CXXRecordDecl 0x{{[^ ]*}} col:7 implicit union E int a; @@ -186,7 +186,7 @@ union G { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit // CHECK-NEXT: CXXRecordDecl 0x{{[^ ]*}} col:7 implicit union G struct { @@ -197,7 +197,7 @@ union G { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit int a; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 a 'int' @@ -214,7 +214,7 @@ union G { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit int c; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 c 'int' @@ -237,7 +237,7 @@ union G { // CHECK-NEXT: MoveConstructor exists simple trivial needs_implicit // CHECK-NEXT: CopyAssignment simple trivial has_const_param needs_implicit implicit_has_const_param // CHECK-NEXT: MoveAssignment exists simple trivial needs_implicit - // CHECK-NEXT: Destructor simple irrelevant trivial{{( maybe_constexpr)?}} needs_implicit + // CHECK-NEXT: Destructor simple irrelevant trivial needs_implicit int e, f; // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} col:9 e 'int' diff --git a/clang/unittests/AST/ASTDumpTest.cpp b/clang/unittests/AST/ASTDumpTest.cpp deleted file mode 100644 index 45884dfd11d05..0000000000000 --- a/clang/unittests/AST/ASTDumpTest.cpp +++ /dev/null @@ -1,140 +0,0 @@ -//===- unittests/AST/ASTDumpTest.cpp --- Declaration tests ----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Tests Decl::dump(). -// -//===----------------------------------------------------------------------===// - -#include "clang/AST/ASTContext.h" -#include "clang/AST/DeclCXX.h" -#include "clang/AST/DeclObjC.h" -#include "clang/Basic/Builtins.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/SourceManager.h" -#include "gtest/gtest.h" - -using namespace clang; - -namespace clang { -namespace ast { - -namespace { -/// An ExternalASTSource that asserts if it is queried for information about -/// any declaration. -class TrappingExternalASTSource : public ExternalASTSource { - ~TrappingExternalASTSource() override = default; - bool FindExternalVisibleDeclsByName(const DeclContext *, - DeclarationName) override { - assert(false && "Unexpected call to FindExternalVisibleDeclsByName"); - return true; - } - - void FindExternalLexicalDecls(const DeclContext *, - llvm::function_ref, - SmallVectorImpl &) override { - assert(false && "Unexpected call to FindExternalLexicalDecls"); - } - - void completeVisibleDeclsMap(const DeclContext *) override { - assert(false && "Unexpected call to completeVisibleDeclsMap"); - } - - void CompleteRedeclChain(const Decl *) override { - assert(false && "Unexpected call to CompleteRedeclChain"); - } - - void CompleteType(TagDecl *) override { - assert(false && "Unexpected call to CompleteType(Tag Decl*)"); - } - - void CompleteType(ObjCInterfaceDecl *) override { - assert(false && "Unexpected call to CompleteType(ObjCInterfaceDecl *)"); - } -}; - -/// Tests that Decl::dump doesn't load additional declarations from the -/// ExternalASTSource. -class ExternalASTSourceDumpTest : public ::testing::Test { -protected: - ExternalASTSourceDumpTest() - : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), - Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), - SourceMgr(Diags, FileMgr), Idents(LangOpts, nullptr), - Ctxt(LangOpts, SourceMgr, Idents, Sels, Builtins) { - Ctxt.setExternalSource(new TrappingExternalASTSource()); - } - - FileSystemOptions FileMgrOpts; - FileManager FileMgr; - IntrusiveRefCntPtr DiagID; - DiagnosticsEngine Diags; - SourceManager SourceMgr; - LangOptions LangOpts; - IdentifierTable Idents; - SelectorTable Sels; - Builtin::Context Builtins; - ASTContext Ctxt; -}; -} // unnamed namespace - -/// Set all flags that activate queries to the ExternalASTSource. -static void setExternalStorageFlags(DeclContext *DC) { - DC->setHasExternalLexicalStorage(); - DC->setHasExternalVisibleStorage(); - DC->setMustBuildLookupTable(); -} - -/// Dumps the given Decl. -static void dumpDecl(Decl *D) { - // Try dumping the decl which shouldn't trigger any calls to the - // ExternalASTSource. - - std::string Out; - llvm::raw_string_ostream OS(Out); - D->dump(OS); -} - -TEST_F(ExternalASTSourceDumpTest, DumpObjCInterfaceDecl) { - // Define an Objective-C interface. - ObjCInterfaceDecl *I = ObjCInterfaceDecl::Create( - Ctxt, Ctxt.getTranslationUnitDecl(), SourceLocation(), - &Ctxt.Idents.get("c"), nullptr, nullptr); - Ctxt.getTranslationUnitDecl()->addDecl(I); - - setExternalStorageFlags(I); - dumpDecl(I); -} - -TEST_F(ExternalASTSourceDumpTest, DumpRecordDecl) { - // Define a struct. - RecordDecl *R = RecordDecl::Create( - Ctxt, TagDecl::TagKind::TTK_Class, Ctxt.getTranslationUnitDecl(), - SourceLocation(), SourceLocation(), &Ctxt.Idents.get("c")); - R->startDefinition(); - R->completeDefinition(); - Ctxt.getTranslationUnitDecl()->addDecl(R); - - setExternalStorageFlags(R); - dumpDecl(R); -} - -TEST_F(ExternalASTSourceDumpTest, DumpCXXRecordDecl) { - // Define a class. - CXXRecordDecl *R = CXXRecordDecl::Create( - Ctxt, TagDecl::TagKind::TTK_Class, Ctxt.getTranslationUnitDecl(), - SourceLocation(), SourceLocation(), &Ctxt.Idents.get("c")); - R->startDefinition(); - R->completeDefinition(); - Ctxt.getTranslationUnitDecl()->addDecl(R); - - setExternalStorageFlags(R); - dumpDecl(R); -} - -} // end namespace ast -} // end namespace clang diff --git a/clang/unittests/AST/CMakeLists.txt b/clang/unittests/AST/CMakeLists.txt index 9e0a33fd762fd..2d5d0172afedc 100644 --- a/clang/unittests/AST/CMakeLists.txt +++ b/clang/unittests/AST/CMakeLists.txt @@ -6,7 +6,6 @@ set(LLVM_LINK_COMPONENTS add_clang_unittest(ASTTests ASTContextParentMapTest.cpp - ASTDumpTest.cpp ASTImporterFixtures.cpp ASTImporterTest.cpp ASTImporterGenericRedeclTest.cpp From bb73fcfd0708d8f145060afa461d96f98f0e5f27 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 7 Sep 2020 12:51:23 +0000 Subject: [PATCH 350/465] [gn build] Port 23f700c785a --- llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn index fd24f89aa187f..f25ead00165c0 100644 --- a/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn @@ -15,7 +15,6 @@ unittest("ASTTests") { ] sources = [ "ASTContextParentMapTest.cpp", - "ASTDumpTest.cpp", "ASTImporterFixtures.cpp", "ASTImporterGenericRedeclTest.cpp", "ASTImporterODRStrategiesTest.cpp", From 2853ae3c1b8174e3660424ffac45922601f700ee Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 13:44:35 +0100 Subject: [PATCH 351/465] [X86] Update SSE/AVX ABS intrinsics to emit llvm.abs.* (PR46851) We're now getting close to having the necessary analysis/combines etc. for the new generic llvm.abs.* intrinsics. This patch updates the SSE/AVX ABS vector intrinsics to emit the generic equivalents instead of the icmp+sub+select code pattern. Differential Revision: https://reviews.llvm.org/D87101 --- clang/lib/CodeGen/CGBuiltin.cpp | 17 +--- clang/test/CodeGen/avx2-builtins.c | 12 +-- clang/test/CodeGen/avx512bw-builtins.c | 32 +++----- clang/test/CodeGen/avx512f-builtins.c | 32 +++----- clang/test/CodeGen/avx512vl-builtins.c | 74 ++++++----------- clang/test/CodeGen/avx512vlbw-builtins.c | 80 ++++++++----------- clang/test/CodeGen/ssse3-builtins.c | 12 +-- .../CodeGen/X86/avx2-intrinsics-fast-isel.ll | 24 +++--- .../CodeGen/X86/ssse3-intrinsics-fast-isel.ll | 24 +++--- 9 files changed, 108 insertions(+), 199 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 42fab29ab8aae..0cb8f8f636f43 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11314,16 +11314,6 @@ static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, return EmitX86Select(CGF, Ops[2], Res, Ops[1]); } -static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef Ops) { - - llvm::Type *Ty = Ops[0]->getType(); - Value *Zero = llvm::Constant::getNullValue(Ty); - Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]); - Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero); - Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub); - return Res; -} - static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, ArrayRef Ops) { Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); @@ -13300,9 +13290,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pabsb512: case X86::BI__builtin_ia32_pabsw512: case X86::BI__builtin_ia32_pabsd512: - case X86::BI__builtin_ia32_pabsq512: - return EmitX86Abs(*this, Ops); - + case X86::BI__builtin_ia32_pabsq512: { + Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + } case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c index 95659895eeaf9..f3de6d1b87474 100644 --- a/clang/test/CodeGen/avx2-builtins.c +++ b/clang/test/CodeGen/avx2-builtins.c @@ -8,25 +8,19 @@ __m256i test_mm256_abs_epi8(__m256i a) { // CHECK-LABEL: test_mm256_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, %{{.*}} - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> %{{.*}}, zeroinitializer - // CHECK: select <32 x i1> [[CMP]], <32 x i8> %{{.*}}, <32 x i8> [[SUB]] + // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false) return _mm256_abs_epi8(a); } __m256i test_mm256_abs_epi16(__m256i a) { // CHECK-LABEL: test_mm256_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, %{{.*}} - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> %{{.*}}, zeroinitializer - // CHECK: select <16 x i1> [[CMP]], <16 x i16> %{{.*}}, <16 x i16> [[SUB]] + // CHECK: [[ABS:%.*]] = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %{{.*}}, i1 false) return _mm256_abs_epi16(a); } __m256i test_mm256_abs_epi32(__m256i a) { // CHECK-LABEL: test_mm256_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, %{{.*}} - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> %{{.*}}, zeroinitializer - // CHECK: select <8 x i1> [[CMP]], <8 x i32> %{{.*}}, <8 x i32> [[SUB]] + // CHECK: [[ABS:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %{{.*}}, i1 false) return _mm256_abs_epi32(a); } diff --git a/clang/test/CodeGen/avx512bw-builtins.c b/clang/test/CodeGen/avx512bw-builtins.c index c08b354d9519b..cc173f1a9cfe6 100644 --- a/clang/test/CodeGen/avx512bw-builtins.c +++ b/clang/test/CodeGen/avx512bw-builtins.c @@ -878,48 +878,36 @@ __m512i test_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) { } __m512i test_mm512_abs_epi8(__m512i __A) { // CHECK-LABEL: @test_mm512_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <64 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[A]], zeroinitializer - // CHECK: select <64 x i1> [[CMP]], <64 x i8> [[A]], <64 x i8> [[SUB]] + // CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false) return _mm512_abs_epi8(__A); } __m512i test_mm512_mask_abs_epi8(__m512i __W, __mmask64 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <64 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[A]], <64 x i8> [[SUB]] - // CHECK: select <64 x i1> %{{.*}}, <64 x i8> [[SEL]], <64 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false) + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> [[ABS]], <64 x i8> %{{.*}} return _mm512_mask_abs_epi8(__W,__U,__A); } __m512i test_mm512_maskz_abs_epi8(__mmask64 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <64 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <64 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <64 x i1> [[CMP]], <64 x i8> [[A]], <64 x i8> [[SUB]] - // CHECK: select <64 x i1> %{{.*}}, <64 x i8> [[SEL]], <64 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false) + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> [[ABS]], <64 x i8> %{{.*}} return _mm512_maskz_abs_epi8(__U,__A); } __m512i test_mm512_abs_epi16(__m512i __A) { // CHECK-LABEL: @test_mm512_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <32 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[A]], zeroinitializer - // CHECK: select <32 x i1> [[CMP]], <32 x i16> [[A]], <32 x i16> [[SUB]] + // CHECK: [[ABS:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %{{.*}}, i1 false) return _mm512_abs_epi16(__A); } __m512i test_mm512_mask_abs_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <32 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[A]], <32 x i16> [[SUB]] - // CHECK: select <32 x i1> %{{.*}}, <32 x i16> [[SEL]], <32 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %{{.*}}, i1 false) + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> [[ABS]], <32 x i16> %{{.*}} return _mm512_mask_abs_epi16(__W,__U,__A); } __m512i test_mm512_maskz_abs_epi16(__mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <32 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[A]], <32 x i16> [[SUB]] - // CHECK: select <32 x i1> %{{.*}}, <32 x i16> [[SEL]], <32 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %{{.*}}, i1 false) + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> [[ABS]], <32 x i16> %{{.*}} return _mm512_maskz_abs_epi16(__U,__A); } __m512i test_mm512_packs_epi32(__m512i __A, __m512i __B) { diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index b01300c424b72..fb5db4c321748 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -10467,44 +10467,36 @@ __m512 test_mm512_set_ps (float __A, float __B, float __C, float __D, __m512i test_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <8 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[A]], <8 x i64> [[SUB]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[SEL]], <8 x i64> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %{{.*}}, i1 false) + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[ABS]], <8 x i64> %{{.*}} return _mm512_mask_abs_epi64 (__W,__U,__A); } __m512i test_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <8 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[A]], <8 x i64> [[SUB]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[SEL]], <8 x i64> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %{{.*}}, i1 false) + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[ABS]], <8 x i64> %{{.*}} return _mm512_maskz_abs_epi64 (__U,__A); } __m512i test_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> - // CHECK: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i32> [[ABS]] to <8 x i64> + // CHECK: [[ABS:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[ABS]], <16 x i32> %{{.*}} return _mm512_mask_abs_epi32 (__W,__U,__A); } __m512i test_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> - // CHECK: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> - // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i32> [[ABS]] to <8 x i64> + // CHECK: [[ABS:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[ABS]], <16 x i32> %{{.*}} return _mm512_maskz_abs_epi32 (__U,__A); } diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c index 3348e05790bcf..e7965119fb4b9 100644 --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -4537,90 +4537,68 @@ __m256 test_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { } __m128i test_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> - // CHECK: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[ABS]], <4 x i32> %{{.*}} return _mm_mask_abs_epi32(__W,__U,__A); } __m128i test_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> - // CHECK: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> - // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <4 x i32> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[ABS]], <4 x i32> %{{.*}} return _mm_maskz_abs_epi32(__U,__A); } __m256i test_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> - // CHECK: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[ABS]], <8 x i32> %{{.*}} return _mm256_mask_abs_epi32(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_abs_epi32 - // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> - // CHECK: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> - // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <8 x i32> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[ABS]], <8 x i32> %{{.*}} return _mm256_maskz_abs_epi32(__U,__A); } __m128i test_mm_abs_epi64(__m128i __A) { // CHECK-LABEL: @test_mm_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[A]], zeroinitializer - // CHECK: select <2 x i1> [[CMP]], <2 x i64> [[A]], <2 x i64> [[SUB]] - + // CHECK: [[ABS:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %{{.*}}, i1 false) return _mm_abs_epi64(__A); } __m128i test_mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A]], <2 x i64> [[SUB]] - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[SEL]], <2 x i64> %{{.*}} - + // CHECK: [[ABS:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %{{.*}}, i1 false) + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[ABS]], <2 x i64> %{{.*}} return _mm_mask_abs_epi64(__W,__U,__A); } __m128i test_mm_maskz_abs_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <2 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A]], <2 x i64> [[SUB]] - // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[SEL]], <2 x i64> %{{.*}} + // CHECK: [[ABS:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %{{.*}}, i1 false) + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> [[ABS]], <2 x i64> %{{.*}} return _mm_maskz_abs_epi64(__U,__A); } __m256i test_mm256_abs_epi64(__m256i __A) { // CHECK-LABEL: @test_mm256_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <4 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], zeroinitializer - // CHECK: select <4 x i1> [[CMP]], <4 x i64> [[A]], <4 x i64> [[SUB]] + // CHECK: [[ABS:%.*]] = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %{{.*}}, i1 false) return _mm256_abs_epi64(__A); } __m256i test_mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <4 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[A]], <4 x i64> [[SUB]] - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[SEL]], <4 x i64> %{{.*}} + // CHECK: [[ABS:%.*]] = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %{{.*}}, i1 false) + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[ABS]], <4 x i64> %{{.*}} return _mm256_mask_abs_epi64(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_abs_epi64 - // CHECK: [[SUB:%.*]] = sub <4 x i64> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i64> [[A]], <4 x i64> [[SUB]] - // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[SEL]], <4 x i64> %{{.*}} + // CHECK: [[ABS:%.*]] = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %{{.*}}, i1 false) + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> [[ABS]], <4 x i64> %{{.*}} return _mm256_maskz_abs_epi64(__U,__A); } __m128i test_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { diff --git a/clang/test/CodeGen/avx512vlbw-builtins.c b/clang/test/CodeGen/avx512vlbw-builtins.c index 0cbd8a1a595fb..df2adfdb97be6 100644 --- a/clang/test/CodeGen/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/avx512vlbw-builtins.c @@ -897,89 +897,73 @@ __m256i test_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) { __m128i test_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[SEL]] to [[DSTTY:<2 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i8> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <16 x i8> + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[ABS]], <16 x i8> %{{.*}} return _mm_mask_abs_epi8(__W,__U,__A); } __m128i test_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i8>]] [[SEL]] to [[DSTTY:<2 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i8> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <16 x i8> + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[ABS]], <16 x i8> %{{.*}} return _mm_maskz_abs_epi8(__U,__A); } __m256i test_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[SEL]] to [[DSTTY:<4 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <32 x i8> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <32 x i8> + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[ABS]], <32 x i8> %{{.*}} return _mm256_mask_abs_epi8(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_abs_epi8 - // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<32 x i8>]] [[SEL]] to [[DSTTY:<4 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} + // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <32 x i8> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <32 x i8> + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[ABS]], <32 x i8> %{{.*}} return _mm256_maskz_abs_epi8(__U,__A); } __m128i test_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[SEL]] to [[DSTTY:<2 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <8 x i16> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <8 x i16> + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[ABS]], <8 x i16> %{{.*}} return _mm_mask_abs_epi16(__W,__U,__A); } __m128i test_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<8 x i16>]] [[SEL]] to [[DSTTY:<2 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <8 x i16> [[ABS]] to <2 x i64> + // CHECK: [[ABS:%.*]] = bitcast <2 x i64> [[TMP]] to <8 x i16> + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[ABS]], <8 x i16> %{{.*}} return _mm_maskz_abs_epi16(__U,__A); } __m256i test_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[SEL]] to [[DSTTY:<4 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i16> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <16 x i16> + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[ABS]], <16 x i16> %{{.*}} return _mm256_mask_abs_epi16(__W,__U,__A); } __m256i test_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_abs_epi16 - // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] - // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer - // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] - // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<16 x i16>]] [[SEL]] to [[DSTTY:<4 x i64>]] - // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] - // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} + // CHECK: [[ABS:%.*]] = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %{{.*}}, i1 false) + // CHECK: [[TMP:%.*]] = bitcast <16 x i16> [[ABS]] to <4 x i64> + // CHECK: [[ABS:%.*]] = bitcast <4 x i64> [[TMP]] to <16 x i16> + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[ABS]], <16 x i16> %{{.*}} return _mm256_maskz_abs_epi16(__U,__A); } diff --git a/clang/test/CodeGen/ssse3-builtins.c b/clang/test/CodeGen/ssse3-builtins.c index b89955fdc0880..d72ca9dd5b41c 100644 --- a/clang/test/CodeGen/ssse3-builtins.c +++ b/clang/test/CodeGen/ssse3-builtins.c @@ -7,25 +7,19 @@ __m128i test_mm_abs_epi8(__m128i a) { // CHECK-LABEL: test_mm_abs_epi8 - // CHECK: [[SUB:%.+]] = sub <16 x i8> zeroinitializer, [[A:%.+]] - // CHECK: [[CMP:%.+]] = icmp sgt <16 x i8> [[A]], zeroinitializer - // CHECK: %{{.*}} = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] + // CHECK: [[ABS:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %{{.*}}, i1 false) return _mm_abs_epi8(a); } __m128i test_mm_abs_epi16(__m128i a) { // CHECK-LABEL: test_mm_abs_epi16 - // CHECK: [[SUB:%.+]] = sub <8 x i16> zeroinitializer, [[A:%.+]] - // CHECK: [[CMP:%.+]] = icmp sgt <8 x i16> [[A]], zeroinitializer - // CHECK: %{{.*}} = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] + // CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %{{.*}}, i1 false) return _mm_abs_epi16(a); } __m128i test_mm_abs_epi32(__m128i a) { // CHECK-LABEL: test_mm_abs_epi32 - // CHECK: [[SUB:%.+]] = sub <4 x i32> zeroinitializer, [[A:%.+]] - // CHECK: [[CMP:%.+]] = icmp sgt <4 x i32> [[A]], zeroinitializer - // CHECK: %{{.*}} = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] + // CHECK: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %{{.*}}, i1 false) return _mm_abs_epi32(a); } diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll index f29891e6f8a3a..0fe9d0b0d35c8 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll @@ -10,13 +10,11 @@ define <4 x i64> @test_mm256_abs_epi8(<4 x i64> %a0) { ; CHECK-NEXT: vpabsb %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %arg = bitcast <4 x i64> %a0 to <32 x i8> - %sub = sub <32 x i8> zeroinitializer, %arg - %cmp = icmp sgt <32 x i8> %arg, zeroinitializer - %sel = select <32 x i1> %cmp, <32 x i8> %arg, <32 x i8> %sub - %res = bitcast <32 x i8> %sel to <4 x i64> + %abs = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %arg, i1 false) + %res = bitcast <32 x i8> %abs to <4 x i64> ret <4 x i64> %res } -declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone +declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1) nounwind readnone define <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a0) { ; CHECK-LABEL: test_mm256_abs_epi16: @@ -24,13 +22,11 @@ define <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a0) { ; CHECK-NEXT: vpabsw %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %arg = bitcast <4 x i64> %a0 to <16 x i16> - %sub = sub <16 x i16> zeroinitializer, %arg - %cmp = icmp sgt <16 x i16> %arg, zeroinitializer - %sel = select <16 x i1> %cmp, <16 x i16> %arg, <16 x i16> %sub - %res = bitcast <16 x i16> %sel to <4 x i64> + %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %arg, i1 false) + %res = bitcast <16 x i16> %abs to <4 x i64> ret <4 x i64> %res } -declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) nounwind readnone define <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a0) { ; CHECK-LABEL: test_mm256_abs_epi32: @@ -38,13 +34,11 @@ define <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a0) { ; CHECK-NEXT: vpabsd %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %arg = bitcast <4 x i64> %a0 to <8 x i32> - %sub = sub <8 x i32> zeroinitializer, %arg - %cmp = icmp sgt <8 x i32> %arg, zeroinitializer - %sel = select <8 x i1> %cmp, <8 x i32> %arg, <8 x i32> %sub - %res = bitcast <8 x i32> %sel to <4 x i64> + %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %arg, i1 false) + %res = bitcast <8 x i32> %abs to <4 x i64> ret <4 x i64> %res } -declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) nounwind readnone define <4 x i64> @test_mm256_add_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; CHECK-LABEL: test_mm256_add_epi8: diff --git a/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll index b0529640eb1d2..6540313a891eb 100644 --- a/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll @@ -19,13 +19,11 @@ define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) { ; AVX-NEXT: vpabsb %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %arg = bitcast <2 x i64> %a0 to <16 x i8> - %sub = sub <16 x i8> zeroinitializer, %arg - %cmp = icmp sgt <16 x i8> %arg, zeroinitializer - %sel = select <16 x i1> %cmp, <16 x i8> %arg, <16 x i8> %sub - %res = bitcast <16 x i8> %sel to <2 x i64> + %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %arg, i1 false) + %res = bitcast <16 x i8> %abs to <2 x i64> ret <2 x i64> %res } -declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) nounwind readnone define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) { ; SSE-LABEL: test_mm_abs_epi16: @@ -38,13 +36,11 @@ define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) { ; AVX-NEXT: vpabsw %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %arg = bitcast <2 x i64> %a0 to <8 x i16> - %sub = sub <8 x i16> zeroinitializer, %arg - %cmp = icmp sgt <8 x i16> %arg, zeroinitializer - %sel = select <8 x i1> %cmp, <8 x i16> %arg, <8 x i16> %sub - %res = bitcast <8 x i16> %sel to <2 x i64> + %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %arg, i1 false) + %res = bitcast <8 x i16> %abs to <2 x i64> ret <2 x i64> %res } -declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) nounwind readnone define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) { ; SSE-LABEL: test_mm_abs_epi32: @@ -57,13 +53,11 @@ define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) { ; AVX-NEXT: vpabsd %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %arg = bitcast <2 x i64> %a0 to <4 x i32> - %sub = sub <4 x i32> zeroinitializer, %arg - %cmp = icmp sgt <4 x i32> %arg, zeroinitializer - %sel = select <4 x i1> %cmp, <4 x i32> %arg, <4 x i32> %sub - %res = bitcast <4 x i32> %sel to <2 x i64> + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %arg, i1 false) + %res = bitcast <4 x i32> %abs to <2 x i64> ret <2 x i64> %res } -declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) nounwind readnone define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_alignr_epi8: From f6db681a78994dd7eb7da62da73754d1321085b3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 13:53:35 +0100 Subject: [PATCH 352/465] [X86][SSE] Move llvm.x86.ssse3.pabs.*.128 intrinsics to ssse3-intrinsics-x86-upgrade.ll These have been auto upgraded for some time so this is just a tidyup. --- .../X86/ssse3-intrinsics-x86-upgrade.ll | 69 +++++++++++++++++++ llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll | 63 ----------------- 2 files changed, 69 insertions(+), 63 deletions(-) create mode 100644 llvm/test/CodeGen/X86/ssse3-intrinsics-x86-upgrade.ll diff --git a/llvm/test/CodeGen/X86/ssse3-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/ssse3-intrinsics-x86-upgrade.ll new file mode 100644 index 0000000000000..8ea56d29b8d06 --- /dev/null +++ b/llvm/test/CodeGen/X86/ssse3-intrinsics-x86-upgrade.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+ssse3 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+ssse3 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 + +define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { +; SSE-LABEL: test_x86_ssse3_pabs_b_128: +; SSE: ## %bb.0: +; SSE-NEXT: pabsb %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1c,0xc0] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_ssse3_pabs_b_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpabsb %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1c,0xc0] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_ssse3_pabs_b_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { +; SSE-LABEL: test_x86_ssse3_pabs_d_128: +; SSE: ## %bb.0: +; SSE-NEXT: pabsd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1e,0xc0] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_ssse3_pabs_d_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpabsd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1e,0xc0] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_ssse3_pabs_d_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpabsd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { +; SSE-LABEL: test_x86_ssse3_pabs_w_128: +; SSE: ## %bb.0: +; SSE-NEXT: pabsw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1d,0xc0] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_ssse3_pabs_w_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpabsw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1d,0xc0] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_ssse3_pabs_w_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll b/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll index 629a759332a93..ac386abd17806 100644 --- a/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll @@ -6,69 +6,6 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 -define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { -; SSE-LABEL: test_x86_ssse3_pabs_b_128: -; SSE: ## %bb.0: -; SSE-NEXT: pabsb %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1c,0xc0] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_ssse3_pabs_b_128: -; AVX1: ## %bb.0: -; AVX1-NEXT: vpabsb %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1c,0xc0] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_ssse3_pabs_b_128: -; AVX512: ## %bb.0: -; AVX512-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] - ret <16 x i8> %res -} -declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone - - -define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { -; SSE-LABEL: test_x86_ssse3_pabs_d_128: -; SSE: ## %bb.0: -; SSE-NEXT: pabsd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1e,0xc0] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_ssse3_pabs_d_128: -; AVX1: ## %bb.0: -; AVX1-NEXT: vpabsd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1e,0xc0] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_ssse3_pabs_d_128: -; AVX512: ## %bb.0: -; AVX512-NEXT: vpabsd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone - - -define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { -; SSE-LABEL: test_x86_ssse3_pabs_w_128: -; SSE: ## %bb.0: -; SSE-NEXT: pabsw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1d,0xc0] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_ssse3_pabs_w_128: -; AVX1: ## %bb.0: -; AVX1-NEXT: vpabsw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1d,0xc0] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_ssse3_pabs_w_128: -; AVX512: ## %bb.0: -; AVX512-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone - - define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: test_x86_ssse3_phadd_d_128: ; SSE: ## %bb.0: From 6b954f1b79605e4139157ce064fe695c86a0f06a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mu=C3=B1oz?= Date: Mon, 7 Sep 2020 16:00:31 +0300 Subject: [PATCH 353/465] [KillTheDoctor/CMake] Add missing keyword PRIVATE in target_link_libraries Add PRIVATE keyword in target_link_libraries to prevent CMake Error on Windows. While trying to compile llvm/clang on Windows, the following CMake error occurred. The reason is a missing PUBLIC/PRIVATE/INTERFACE keyword in target_link_libraries. ` CMake Error at utils/KillTheDoctor/CMakeLists.txt:5 (target_link_libraries): The keyword signature for target_link_libraries has already been used with the target "KillTheDoctor". All uses of target_link_libraries with a target must be either all-keyword or all-plain. The uses of the keyword signature are here: * cmake/modules/AddLLVM.cmake:771 (target_link_libraries) ` Reviewed By: tambre Differential Revision: https://reviews.llvm.org/D87203 --- llvm/utils/KillTheDoctor/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/KillTheDoctor/CMakeLists.txt b/llvm/utils/KillTheDoctor/CMakeLists.txt index 72d994fb9953a..53b90b8204249 100644 --- a/llvm/utils/KillTheDoctor/CMakeLists.txt +++ b/llvm/utils/KillTheDoctor/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_utility(KillTheDoctor ) target_link_libraries(KillTheDoctor + PRIVATE LLVMSupport psapi ) From 4368739941eb1336197dde0e92eb23ce79739ac7 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 7 Sep 2020 15:52:51 +0300 Subject: [PATCH 354/465] [llvm-readobj] - Remove code duplication when printing dynamic relocations. NFCI. LLVM style code can be simplified to avoid the duplication of logic related to printing dynamic relocations. Differential revision: https://reviews.llvm.org/D87089 --- llvm/tools/llvm-readobj/ELFDumper.cpp | 33 ++++++++++----------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 9c1b2e3209373..df3799c8fbe67 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -960,6 +960,7 @@ template class LLVMStyle : public DumpStyle { void printRelrReloc(const Elf_Relr &R) override; void printDynamicReloc(const Relocation &R) override; + void printRelRelaReloc(const Relocation &R, StringRef SymbolName); void printSymbols(); void printDynamicSymbols(); void printSymbolSection(const Elf_Sym *Symbol, const Elf_Sym *First); @@ -6156,7 +6157,12 @@ void LLVMStyle::printReloc(const Relocation &R, unsigned RelIndex, return; } - std::string TargetName = Target->Name; + printRelRelaReloc(R, Target->Name); +} + +template +void LLVMStyle::printRelRelaReloc(const Relocation &R, + StringRef SymbolName) { SmallString<32> RelocName; this->Obj.getRelocationTypeName(R.Type, RelocName); @@ -6165,12 +6171,12 @@ void LLVMStyle::printReloc(const Relocation &R, unsigned RelIndex, DictScope Group(W, "Relocation"); W.printHex("Offset", R.Offset); W.printNumber("Type", RelocName, R.Type); - W.printNumber("Symbol", !TargetName.empty() ? TargetName : "-", R.Symbol); + W.printNumber("Symbol", !SymbolName.empty() ? SymbolName : "-", R.Symbol); W.printHex("Addend", Addend); } else { raw_ostream &OS = W.startLine(); OS << W.hex(R.Offset) << " " << RelocName << " " - << (!TargetName.empty() ? TargetName : "-") << " " << W.hex(Addend) + << (!SymbolName.empty() ? SymbolName : "-") << " " << W.hex(Addend) << "\n"; } } @@ -6362,24 +6368,9 @@ template void LLVMStyle::printDynamicRelocations() { template void LLVMStyle::printDynamicReloc(const Relocation &R) { - SmallString<32> RelocName; - this->Obj.getRelocationTypeName(R.Type, RelocName); - std::string SymbolName = - getSymbolForReloc(this->Obj, this->FileName, this->dumper(), R).Name; - - uintX_t Addend = R.Addend.getValueOr(0); - if (opts::ExpandRelocs) { - DictScope Group(W, "Relocation"); - W.printHex("Offset", R.Offset); - W.printNumber("Type", RelocName, R.Type); - W.printString("Symbol", !SymbolName.empty() ? SymbolName : "-"); - W.printHex("Addend", Addend); - } else { - raw_ostream &OS = W.startLine(); - OS << W.hex(R.Offset) << " " << RelocName << " " - << (!SymbolName.empty() ? SymbolName : "-") << " " << W.hex(Addend) - << "\n"; - } + RelSymbol S = + getSymbolForReloc(this->Obj, this->FileName, this->dumper(), R); + printRelRelaReloc(R, S.Name); } template From a5046f7acece0085cb6f457da7ebca34d267155a Mon Sep 17 00:00:00 2001 From: Esme-Yi Date: Mon, 7 Sep 2020 13:14:00 +0000 Subject: [PATCH 355/465] [NFC][PowerPC] Add tests in constants-i64.ll. --- llvm/test/CodeGen/PowerPC/constants-i64.ll | 45 +++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/PowerPC/constants-i64.ll b/llvm/test/CodeGen/PowerPC/constants-i64.ll index fa45dd19c1654..956845f5a5b35 100644 --- a/llvm/test/CodeGen/PowerPC/constants-i64.ll +++ b/llvm/test/CodeGen/PowerPC/constants-i64.ll @@ -80,5 +80,48 @@ entry: ; CHECK: blr } -attributes #0 = { nounwind readnone } +define i64 @cn32_1() #0 { +entry: + ret i64 3900000000 + +; CHECK-LABEL: @cn32_1 +; CHECK: lis [[REG1:[0-9]+]], 232 +; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 30023 +; CHECK: sldi 3, [[REG1]], 8 +; CHECK: blr +} +define i32 @cn32_1_i32() #0 { +entry: + ret i32 -394967296 + +; CHECK-LABEL: @cn32_1_i32 +; CHECK: lis [[REG1:[0-9]+]], 232 +; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 30023 +; CHECK: sldi 3, [[REG1]], 8 +; CHECK: blr +} + +define i64 @cn32_2() #0 { +entry: + ret i64 4294967295 + +; CHECK-LABEL: @cn32_2 +; CHECK: li [[REG1:[0-9]+]], 0 +; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: blr +} + +define i32 @cn32_2_i32() #0 { +entry: + ret i32 -1 + +; CHECK-LABEL: @cn32_2_i32 +; CHECK: li [[REG1:[0-9]+]], 0 +; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: blr +} + +attributes #0 = { nounwind readnone } From 1e1a4a481987f77fe3e6debc015c1d07af249258 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Fri, 4 Sep 2020 10:00:52 +0200 Subject: [PATCH 356/465] [mlir] Take ValueRange instead of ArrayRef in StructuredIndexed This was likely overlooked when ValueRange was first introduced. There is no reason why StructuredIndexed needs specifically an ArrayRef so use ValueRange for better type compatibility with the rest of the APIs. Reviewed By: nicolasvasilache, mehdi_amini Differential Revision: https://reviews.llvm.org/D87127 --- mlir/include/mlir/EDSC/Builders.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/EDSC/Builders.h b/mlir/include/mlir/EDSC/Builders.h index 1f21af617e4d2..70c948d99cda8 100644 --- a/mlir/include/mlir/EDSC/Builders.h +++ b/mlir/include/mlir/EDSC/Builders.h @@ -190,7 +190,7 @@ class TemplatedIndexedValue { TemplatedIndexedValue operator()(Value index, Args... indices) { return TemplatedIndexedValue(value, index).append(indices...); } - TemplatedIndexedValue operator()(ArrayRef indices) { + TemplatedIndexedValue operator()(ValueRange indices) { return TemplatedIndexedValue(value, indices); } @@ -319,7 +319,7 @@ class TemplatedIndexedValue { } private: - TemplatedIndexedValue(Value value, ArrayRef indices) + TemplatedIndexedValue(Value value, ValueRange indices) : value(value), indices(indices.begin(), indices.end()) {} TemplatedIndexedValue &append() { return *this; } From 4b530f75199d9b5e7ee1f4e86e1513a83b4b86cb Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 14:16:38 +0100 Subject: [PATCH 357/465] [X86][SSE] Use llvm.abs.* vector intrinsics instead of old (deprecated) SSE/AVX intrinsics for combine tests This also allows us to extend testing to SSE2+ targets --- llvm/test/CodeGen/X86/combine-abs.ll | 160 ++++++++++++++++++++------- 1 file changed, 121 insertions(+), 39 deletions(-) diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll index 95a2b7e392ba5..70d627b6ed0f5 100644 --- a/llvm/test/CodeGen/X86/combine-abs.ll +++ b/llvm/test/CodeGen/X86/combine-abs.ll @@ -1,24 +1,37 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL ; fold (abs c1) -> c2 define <4 x i32> @combine_v4i32_abs_constant() { -; CHECK-LABEL: combine_v4i32_abs_constant: -; CHECK: # %bb.0: -; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,3,2147483648] -; CHECK-NEXT: retq - %1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> ) +; SSE-LABEL: combine_v4i32_abs_constant: +; SSE: # %bb.0: +; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,1,3,2147483648] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_v4i32_abs_constant: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,3,2147483648] +; AVX-NEXT: retq + %1 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> , i1 false) ret <4 x i32> %1 } define <16 x i16> @combine_v16i16_abs_constant() { -; CHECK-LABEL: combine_v16i16_abs_constant: -; CHECK: # %bb.0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,1,3,3,7,7,255,255,4096,4096,32767,32767,32768,32768,0] -; CHECK-NEXT: retq - %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> ) +; SSE-LABEL: combine_v16i16_abs_constant: +; SSE: # %bb.0: +; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,1,1,3,3,7,7,255] +; SSE-NEXT: movaps {{.*#+}} xmm1 = [255,4096,4096,32767,32767,32768,32768,0] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_v16i16_abs_constant: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,1,3,3,7,7,255,255,4096,4096,32767,32767,32768,32768,0] +; AVX-NEXT: retq + %1 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> , i1 false) ret <16 x i16> %1 } @@ -40,11 +53,24 @@ define i32 @combine_i32_abs_abs(i32 %a) { } define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) { -; CHECK-LABEL: combine_v8i16_abs_abs: -; CHECK: # %bb.0: -; CHECK-NEXT: vpabsw %xmm0, %xmm0 -; CHECK-NEXT: retq - %a1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a) +; SSE2-LABEL: combine_v8i16_abs_abs: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psraw $15, %xmm1 +; SSE2-NEXT: paddw %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: combine_v8i16_abs_abs: +; SSE42: # %bb.0: +; SSE42-NEXT: pabsw %xmm0, %xmm0 +; SSE42-NEXT: retq +; +; AVX-LABEL: combine_v8i16_abs_abs: +; AVX: # %bb.0: +; AVX-NEXT: vpabsw %xmm0, %xmm0 +; AVX-NEXT: retq + %a1 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a, i1 false) %s2 = ashr <8 x i16> %a1, %a2 = add <8 x i16> %a1, %s2 %x2 = xor <8 x i16> %a2, %s2 @@ -52,18 +78,63 @@ define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) { } define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) { -; CHECK-LABEL: combine_v32i8_abs_abs: -; CHECK: # %bb.0: -; CHECK-NEXT: vpabsb %ymm0, %ymm0 -; CHECK-NEXT: retq +; SSE2-LABEL: combine_v32i8_abs_abs: +; SSE2: # %bb.0: +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtb %xmm0, %xmm3 +; SSE2-NEXT: paddb %xmm3, %xmm0 +; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; SSE2-NEXT: paddb %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: retq +; +; SSE42-LABEL: combine_v32i8_abs_abs: +; SSE42: # %bb.0: +; SSE42-NEXT: pabsb %xmm0, %xmm0 +; SSE42-NEXT: pabsb %xmm1, %xmm1 +; SSE42-NEXT: retq +; +; AVX-LABEL: combine_v32i8_abs_abs: +; AVX: # %bb.0: +; AVX-NEXT: vpabsb %ymm0, %ymm0 +; AVX-NEXT: retq %n1 = sub <32 x i8> zeroinitializer, %a %b1 = icmp slt <32 x i8> %a, zeroinitializer %a1 = select <32 x i1> %b1, <32 x i8> %n1, <32 x i8> %a - %a2 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a1) + %a2 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a1, i1 false) ret <32 x i8> %a2 } define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) { +; SSE2-LABEL: combine_v4i64_abs_abs: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: paddq %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: paddq %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: retq +; +; SSE42-LABEL: combine_v4i64_abs_abs: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa %xmm0, %xmm2 +; SSE42-NEXT: pxor %xmm3, %xmm3 +; SSE42-NEXT: pxor %xmm4, %xmm4 +; SSE42-NEXT: psubq %xmm0, %xmm4 +; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE42-NEXT: psubq %xmm1, %xmm3 +; SSE42-NEXT: movdqa %xmm1, %xmm0 +; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 +; SSE42-NEXT: movapd %xmm2, %xmm0 +; SSE42-NEXT: retq +; ; AVX2-LABEL: combine_v4i64_abs_abs: ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -93,31 +164,42 @@ define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) { ; fold (abs x) -> x iff not-negative define <16 x i8> @combine_v16i8_abs_constant(<16 x i8> %a) { -; CHECK-LABEL: combine_v16i8_abs_constant: -; CHECK: # %bb.0: -; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: combine_v16i8_abs_constant: +; SSE: # %bb.0: +; SSE-NEXT: andps {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_v16i8_abs_constant: +; AVX: # %bb.0: +; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq %1 = insertelement <16 x i8> undef, i8 15, i32 0 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer %3 = and <16 x i8> %a, %2 - %4 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %3) + %4 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %3, i1 false) ret <16 x i8> %4 } define <8 x i32> @combine_v8i32_abs_pos(<8 x i32> %a) { -; CHECK-LABEL: combine_v8i32_abs_pos: -; CHECK: # %bb.0: -; CHECK-NEXT: vpsrld $1, %ymm0, %ymm0 -; CHECK-NEXT: retq +; SSE-LABEL: combine_v8i32_abs_pos: +; SSE: # %bb.0: +; SSE-NEXT: psrld $1, %xmm0 +; SSE-NEXT: psrld $1, %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_v8i32_abs_pos: +; AVX: # %bb.0: +; AVX-NEXT: vpsrld $1, %ymm0, %ymm0 +; AVX-NEXT: retq %1 = lshr <8 x i32> %a, - %2 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %1) + %2 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %1, i1 false) ret <8 x i32> %2 } -declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone -declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone -declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) nounwind readnone +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) nounwind readnone +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) nounwind readnone -declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone -declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone -declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone +declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1) nounwind readnone +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) nounwind readnone +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) nounwind readnone From 96e0f34be797ab8bb80526367009495a7eb2118f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 15:07:26 +0100 Subject: [PATCH 358/465] [X86] Auto upgrade SSE/AVX PABS intrinsics to generic Intrinsic::abs Minor followup to D87101, we were expanding this to a neg+icmp+select pattern like we were in CGBuiltin --- llvm/lib/IR/AutoUpgrade.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 369dc50895727..5ec3eb85d99d4 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1375,16 +1375,12 @@ static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, } static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) { + Type *Ty = CI.getType(); Value *Op0 = CI.getArgOperand(0); - llvm::Type *Ty = Op0->getType(); - Value *Zero = llvm::Constant::getNullValue(Ty); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero); - Value *Neg = Builder.CreateNeg(Op0); - Value *Res = Builder.CreateSelect(Cmp, Op0, Neg); - + Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty); + Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)}); if (CI.getNumArgOperands() == 3) - Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1)); - + Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1)); return Res; } From 288c582fc93956a7464a988a77c9f6d0f79ed65d Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Mon, 7 Sep 2020 14:51:39 +0100 Subject: [PATCH 359/465] Follow up of rG5f1cad4d296a, slightly reduced test case. NFC. --- llvm/test/CodeGen/ARM/pr45824.ll | 57 ++++++++++++-------------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/llvm/test/CodeGen/ARM/pr45824.ll b/llvm/test/CodeGen/ARM/pr45824.ll index dda5bc656fcf7..221c764526b44 100644 --- a/llvm/test/CodeGen/ARM/pr45824.ll +++ b/llvm/test/CodeGen/ARM/pr45824.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=armv7-none-linux-eabi < %s | FileCheck %s -define void @vld1x2([8 x i32] %0) { +define void @vld1x2(i8* %v4, i32 %v2) { ; CHECK-LABEL: vld1x2: ; CHECK: @ %bb.0: ; CHECK-NEXT: mov r0, #0 @@ -13,26 +13,21 @@ define void @vld1x2([8 x i32] %0) { ; CHECK-NEXT: @ in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: vst3.8 {d16, d17, d18}, [r0] ; CHECK-NEXT: b .LBB0_1 - %2 = extractvalue [8 x i32] %0, 5 - br label %3 - -3: ; preds = %.loopexit, %1 - %4 = getelementptr inbounds i8, i8* undef, i32 undef br label %.preheader .preheader: ; preds = %.preheader, %3 - %5 = icmp eq i8* %4, undef - br i1 %5, label %.loopexit, label %.preheader + %v5 = icmp eq i8* %v4, undef + br i1 %v5, label %.loopexit, label %.preheader .loopexit: ; preds = %.preheader - %6 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %4) - %7 = getelementptr inbounds i8, i8* %4, i32 %2 - %8 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %7) + %v6 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %v4) + %v7 = getelementptr inbounds i8, i8* %v4, i32 %v2 + %v8 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %v7) tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 1) - br label %3 + br label %.preheader } -define void @vld1x3([8 x i32] %0) { +define void @vld1x3(i8* %v4, i32 %v2) { ; CHECK-LABEL: vld1x3: ; CHECK: @ %bb.0: ; CHECK-NEXT: mov r0, #0 @@ -44,26 +39,21 @@ define void @vld1x3([8 x i32] %0) { ; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: vst3.8 {d16, d17, d18}, [r0] ; CHECK-NEXT: b .LBB1_1 - %2 = extractvalue [8 x i32] %0, 5 - br label %3 - -3: ; preds = %.loopexit, %1 - %4 = getelementptr inbounds i8, i8* undef, i32 undef br label %.preheader .preheader: ; preds = %.preheader, %3 - %5 = icmp eq i8* %4, undef - br i1 %5, label %.loopexit, label %.preheader + %v5 = icmp eq i8* %v4, undef + br i1 %v5, label %.loopexit, label %.preheader .loopexit: ; preds = %.preheader - %6 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %4) - %7 = getelementptr inbounds i8, i8* %4, i32 %2 - %8 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %7) + %v6 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %v4) + %v7 = getelementptr inbounds i8, i8* %v4, i32 %v2 + %v8 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %v7) tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 1) - br label %3 + br label %.preheader } -define void @vld1x4([8 x i32] %0) { +define void @vld1x4(i8* %v4, i32 %v2) { ; CHECK-LABEL: vld1x4: ; CHECK: @ %bb.0: ; CHECK-NEXT: mov r0, #0 @@ -75,23 +65,18 @@ define void @vld1x4([8 x i32] %0) { ; CHECK-NEXT: @ in Loop: Header=BB2_1 Depth=1 ; CHECK-NEXT: vst3.8 {d16, d17, d18}, [r0] ; CHECK-NEXT: b .LBB2_1 - %2 = extractvalue [8 x i32] %0, 5 - br label %3 - -3: ; preds = %.loopexit, %1 - %4 = getelementptr inbounds i8, i8* undef, i32 undef br label %.preheader .preheader: ; preds = %.preheader, %3 - %5 = icmp eq i8* %4, undef - br i1 %5, label %.loopexit, label %.preheader + %v5 = icmp eq i8* %v4, undef + br i1 %v5, label %.loopexit, label %.preheader .loopexit: ; preds = %.preheader - %6 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %4) - %7 = getelementptr inbounds i8, i8* %4, i32 %2 - %8 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %7) + %v6 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %v4) + %v7 = getelementptr inbounds i8, i8* %v4, i32 %v2 + %v8 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %v7) tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 1) - br label %3 + br label %.preheader } declare void @llvm.arm.neon.vst3.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) From a70f2eb3e39a42a71ba077247f9deafbdf1e8092 Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Mon, 7 Sep 2020 13:58:01 +0000 Subject: [PATCH 360/465] [MLIR][Shape] Merge `shape` to `std`/`scf` lowerings. Merge the two lowering passes because they are not useful by themselves. The new pass lowers to `std` and `scf` is considered an auxiliary dialect. See also https://llvm.discourse.group/t/conversions-with-multiple-target-dialects/1541/12 Differential Revision: https://reviews.llvm.org/D86779 --- mlir/include/mlir/Conversion/Passes.h | 1 - mlir/include/mlir/Conversion/Passes.td | 12 +- .../mlir/Conversion/ShapeToSCF/ShapeToSCF.h | 27 -- mlir/lib/Conversion/CMakeLists.txt | 1 - mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt | 19 - mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp | 337 --------------- .../ShapeToStandard/ShapeToStandard.cpp | 391 +++++++++++++++--- .../Conversion/ShapeToSCF/shape-to-scf.mlir | 132 ------ .../ShapeToStandard/shape-to-standard.mlir | 211 ++++++++-- 9 files changed, 497 insertions(+), 634 deletions(-) delete mode 100644 mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h delete mode 100644 mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt delete mode 100644 mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp delete mode 100644 mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h index 5dd10932981ba..b04498598b290 100644 --- a/mlir/include/mlir/Conversion/Passes.h +++ b/mlir/include/mlir/Conversion/Passes.h @@ -23,7 +23,6 @@ #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h" #include "mlir/Conversion/SCFToStandard/SCFToStandard.h" #include "mlir/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVMPass.h" -#include "mlir/Conversion/ShapeToSCF/ShapeToSCF.h" #include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h" diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 1b27a7308c7a0..d4b478dbf4ed0 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -239,17 +239,7 @@ def ConvertShapeToStandard : Pass<"convert-shape-to-std", "ModuleOp"> { let summary = "Convert operations from the shape dialect into the standard " "dialect"; let constructor = "mlir::createConvertShapeToStandardPass()"; - let dependentDialects = ["StandardOpsDialect"]; -} - -//===----------------------------------------------------------------------===// -// ShapeToSCF -//===----------------------------------------------------------------------===// - -def ConvertShapeToSCF : FunctionPass<"convert-shape-to-scf"> { - let summary = "Convert operations from the shape dialect to the SCF dialect"; - let constructor = "mlir::createConvertShapeToSCFPass()"; - let dependentDialects = ["scf::SCFDialect"]; + let dependentDialects = ["StandardOpsDialect", "scf::SCFDialect"]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h b/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h deleted file mode 100644 index f953f6e2ddf10..0000000000000 --- a/mlir/include/mlir/Conversion/ShapeToSCF/ShapeToSCF.h +++ /dev/null @@ -1,27 +0,0 @@ -//===- ShapeToSCF.h - Conversion utils from Shape to SCF dialect ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ -#define MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ - -#include - -namespace mlir { - -class MLIRContext; -class FunctionPass; -class OwningRewritePatternList; - -void populateShapeToSCFConversionPatterns(OwningRewritePatternList &patterns, - MLIRContext *ctx); - -std::unique_ptr createConvertShapeToSCFPass(); - -} // namespace mlir - -#endif // MLIR_CONVERSION_SHAPETOSCF_SHAPETOSCF_H_ diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index c2bb2130569d3..fe2af07b2a6a8 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -12,7 +12,6 @@ add_subdirectory(OpenMPToLLVM) add_subdirectory(SCFToGPU) add_subdirectory(SCFToSPIRV) add_subdirectory(SCFToStandard) -add_subdirectory(ShapeToSCF) add_subdirectory(ShapeToStandard) add_subdirectory(SPIRVToLLVM) add_subdirectory(StandardToLLVM) diff --git a/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt b/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt deleted file mode 100644 index 60dd2b8514da4..0000000000000 --- a/mlir/lib/Conversion/ShapeToSCF/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -add_mlir_conversion_library(MLIRShapeToSCF - ShapeToSCF.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/ShapeToSCF - - DEPENDS - MLIRConversionPassIncGen - - LINK_COMPONENTS - Core - - LINK_LIBS PUBLIC - MLIRIR - MLIRShape - MLIRPass - MLIRSCF - MLIRTransforms - ) diff --git a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp deleted file mode 100644 index ae326c5c513e6..0000000000000 --- a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp +++ /dev/null @@ -1,337 +0,0 @@ -//===- ShapeToSCF.cpp - conversion from Shape to SCF dialect --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Conversion/ShapeToSCF/ShapeToSCF.h" - -#include "../PassDetail.h" -#include "mlir/Dialect/SCF/SCF.h" -#include "mlir/Dialect/Shape/IR/Shape.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/IR/BlockAndValueMapping.h" -#include "mlir/Transforms/DialectConversion.h" - -using namespace mlir; -using namespace mlir::shape; -using namespace mlir::scf; - -namespace { -struct BroadcastOpConverter : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(BroadcastOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; -}; -} // namespace - -LogicalResult BroadcastOpConverter::matchAndRewrite( - BroadcastOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands, not - // on shapes. - if (op.getType().isa()) - return failure(); - - assert(!op.lhs().getType().isa() && - !op.rhs().getType().isa()); - auto loc = op.getLoc(); - BroadcastOp::Adaptor transformed(operands); - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - - // Find smaller and greater rank and extent tensor. - Value lhsRank = rewriter.create(loc, transformed.lhs(), zero); - Value rhsRank = rewriter.create(loc, transformed.rhs(), zero); - Value lhsSmaller = - rewriter.create(loc, CmpIPredicate::ule, lhsRank, rhsRank); - Type indexTy = rewriter.getIndexType(); - Type extentTensorTy = op.getType(); - auto ifOp = rewriter.create( - loc, TypeRange{indexTy, extentTensorTy, indexTy, extentTensorTy}, - lhsSmaller, - [&](OpBuilder &b, Location loc) { - b.create(loc, ValueRange{lhsRank, transformed.lhs(), - rhsRank, transformed.rhs()}); - }, - [&](OpBuilder &b, Location loc) { - b.create(loc, ValueRange{rhsRank, transformed.rhs(), - lhsRank, transformed.lhs()}); - }); - Value smallerRank = ifOp.getResult(0); - Value smallerOperand = ifOp.getResult(1); - Value greaterRank = ifOp.getResult(2); - Value greaterOperand = ifOp.getResult(3); - - // Allocate stack memory for the broadcasted extent tensor. - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); - Value mem = rewriter.create(loc, memTy, ValueRange{greaterRank}); - - // Copy extents from greater operand that are not challenged. - Value rankDiff = - rewriter.create(loc, indexTy, greaterRank, smallerRank); - rewriter.create(loc, zero, rankDiff, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange) { - Value extent = b.create( - loc, greaterOperand, ValueRange{iv}); - b.create(loc, extent, mem, ValueRange{iv}); - b.create(loc); - }); - - // Determine remaining broadcasted extents. - rewriter.create( - loc, rankDiff, greaterRank, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange) { - Value greaterOperandExtent = - b.create(loc, greaterOperand, ValueRange{iv}); - Value greaterOperandExtentIsOne = - b.create(loc, CmpIPredicate::eq, greaterOperandExtent, one); - auto ifOp = b.create( - loc, TypeRange{indexTy}, greaterOperandExtentIsOne, - [&](OpBuilder &b, Location loc) { - Value ivShifted = b.create(loc, indexTy, iv, rankDiff); - Value smallerOperandExtent = b.create( - loc, smallerOperand, ValueRange{ivShifted}); - b.create(loc, smallerOperandExtent); - }, - [&](OpBuilder &b, Location loc) { - b.create(loc, greaterOperandExtent); - }); - Value extent = ifOp.getResult(0); - b.create(loc, extent, mem, ValueRange{iv}); - b.create(loc); - }); - - // Load broadcasted shape as an extent tensor. - rewriter.replaceOpWithNewOp(op, mem); - return success(); -} - -namespace { -/// Converts `shape.shape_eq` to an `scf.for` loop. For now, the lowering is -/// only defined on `tensor` operands. The test for equality first -/// compares their size and, if equal, checks every extent for equality. -/// -/// Example: -/// -/// %result = shape.shape_eq %a, %b : tensor, tensor -/// -/// becomes -/// -/// %c0 = constant 0 : index -/// %0 = dim %arg0, %c0 : tensor -/// %1 = dim %arg1, %c0 : tensor -/// %2 = cmpi "eq", %0, %1 : index -/// %result = scf.if %2 -> (i1) { -/// %c1 = constant 1 : index -/// %true = constant true -/// %4 = scf.for %arg2 = %c0 to %0 step %c1 iter_args(%arg3 = %true) -> (i1) { -/// %5 = extract_element %arg0[%arg2] : tensor -/// %6 = extract_element %arg1[%arg2] : tensor -/// %7 = cmpi "eq", %5, %6 : index -/// %8 = and %arg3, %7 : i1 -/// scf.yield %8 : i1 -/// } -/// scf.yield %4 : i1 -/// } else { -/// %false = constant false -/// scf.yield %false : i1 -/// } -/// -struct ShapeEqOpConverter : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ShapeEqOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; -}; -} // namespace - -LogicalResult -ShapeEqOpConverter::matchAndRewrite(ShapeEqOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands, not - // on shapes. - if (op.lhs().getType().isa() || - op.rhs().getType().isa()) { - return failure(); - } - - ShapeEqOp::Adaptor transformed(operands); - auto loc = op.getLoc(); - Type indexTy = rewriter.getIndexType(); - Value zero = rewriter.create(loc, 0); - Value lhsRank = rewriter.create(loc, indexTy, transformed.lhs(), zero); - Value rhsRank = rewriter.create(loc, indexTy, transformed.rhs(), zero); - Value eqRank = - rewriter.create(loc, CmpIPredicate::eq, lhsRank, rhsRank); - Type i1Ty = rewriter.getI1Type(); - rewriter.replaceOpWithNewOp( - op, i1Ty, eqRank, - [&](OpBuilder &b, Location loc) { - Value one = b.create(loc, 1); - Value init = b.create(loc, i1Ty, b.getBoolAttr(true)); - auto loop = b.create( - loc, zero, lhsRank, one, ValueRange{init}, - [&](OpBuilder &b, Location nestedLoc, Value iv, ValueRange args) { - Value conj = args[0]; - Value lhsExtent = - b.create(loc, transformed.lhs(), iv); - Value rhsExtent = - b.create(loc, transformed.rhs(), iv); - Value eqExtent = b.create(loc, CmpIPredicate::eq, - lhsExtent, rhsExtent); - Value conjNext = b.create(loc, conj, eqExtent); - b.create(loc, ValueRange({conjNext})); - }); - b.create(loc, loop.getResults()); - }, - [&](OpBuilder &b, Location loc) { - Value result = b.create(loc, i1Ty, b.getBoolAttr(false)); - b.create(loc, result); - }); - return success(); -} - -namespace { -/// Converts `shape.reduce` to `scf.for`. -struct ReduceOpConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(shape::ReduceOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final; -}; -} // namespace - -LogicalResult -ReduceOpConverter::matchAndRewrite(shape::ReduceOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering is only defined on `tensor` operands. - if (op.shape().getType().isa()) - return failure(); - - auto loc = op.getLoc(); - shape::ReduceOp::Adaptor transformed(operands); - - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - Type indexTy = rewriter.getIndexType(); - Value rank = rewriter.create(loc, indexTy, transformed.shape(), zero); - - auto loop = rewriter.create( - loc, zero, rank, one, op.initVals(), - [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { - Value extent = b.create(loc, transformed.shape(), iv); - - SmallVector mappedValues{iv, extent}; - mappedValues.append(args.begin(), args.end()); - - BlockAndValueMapping mapping; - Block *reduceBody = op.getBody(); - mapping.map(reduceBody->getArguments(), mappedValues); - for (auto &nested : reduceBody->without_terminator()) - b.clone(nested, mapping); - - SmallVector mappedResults; - for (auto result : reduceBody->getTerminator()->getOperands()) - mappedResults.push_back(mapping.lookup(result)); - b.create(loc, mappedResults); - }); - - rewriter.replaceOp(op, loop.getResults()); - return success(); -} - -namespace { -/// Converts `shape_of` to for loop for unranked tensors. -class ShapeOfOpConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ShapeOfOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override; -}; -} // namespace - -LogicalResult -ShapeOfOpConverter::matchAndRewrite(ShapeOfOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - // For now, this lowering supports only error-free arguments. - if (op.getType().isa()) - return failure(); - - // For ranked tensors `shape_of` lowers to `std` and the pattern can be - // found in the corresponding pass. - ShapeOfOp::Adaptor transformed(operands); - Value arg = transformed.arg(); - Type argTy = arg.getType(); - if (argTy.isa()) - return failure(); - - // Allocate stack memory. - auto loc = op.getLoc(); - Value rank = rewriter.create(loc, arg); - Type indexTy = rewriter.getIndexType(); - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); - Value mem = rewriter.create(loc, memTy, ValueRange{rank}); - - // Copy shape extents to stack-allocated memory. - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - rewriter.create( - loc, zero, rank, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { - Value dim = rewriter.create(loc, arg, iv); - rewriter.create(loc, dim, mem, ValueRange{iv}); - rewriter.create(loc); - }); - - // Load extents to tensor value. - rewriter.replaceOpWithNewOp(op.getOperation(), mem); - return success(); -} - -namespace { -struct ConvertShapeToSCFPass - : public ConvertShapeToSCFBase { - void runOnFunction() override; -}; -} // namespace - -void ConvertShapeToSCFPass::runOnFunction() { - MLIRContext &ctx = getContext(); - - // Populate conversion patterns. - OwningRewritePatternList patterns; - populateShapeToSCFConversionPatterns(patterns, &ctx); - - // Setup target legality. - ConversionTarget target(getContext()); - target.addLegalDialect(); - - // Apply conversion. - if (failed(applyPartialConversion(getFunction(), target, patterns))) - signalPassFailure(); -} - -void mlir::populateShapeToSCFConversionPatterns( - OwningRewritePatternList &patterns, MLIRContext *ctx) { - // clang-format off - patterns.insert< - BroadcastOpConverter, - ShapeEqOpConverter, - ReduceOpConverter, - ShapeOfOpConverter>(ctx); - // clang-format on -} - -std::unique_ptr mlir::createConvertShapeToSCFPass() { - return std::make_unique(); -} diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp index e92bb83d4f424..8c917e08f942c 100644 --- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp +++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp @@ -12,10 +12,12 @@ #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/Shape/IR/Shape.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/BlockAndValueMapping.h" #include "mlir/Transforms/DialectConversion.h" using namespace mlir; using namespace mlir::shape; +using namespace mlir::scf; /// Conversion patterns. namespace { @@ -63,67 +65,94 @@ class BinaryOpConversion : public OpConversionPattern { } // namespace namespace { -class ConstSizeOpConversion : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ConstSizeOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp(op, op.value().getSExtValue()); - return success(); - } -}; -} // namespace - -namespace { -class ShapeOfOpConversion : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; +struct BroadcastOpConverter : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(ShapeOfOp op, ArrayRef operands, + matchAndRewrite(BroadcastOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override; }; } // namespace -LogicalResult ShapeOfOpConversion::matchAndRewrite( - ShapeOfOp op, ArrayRef operands, +LogicalResult BroadcastOpConverter::matchAndRewrite( + BroadcastOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const { - - // For now, only error-free types are supported by this lowering. + // For now, this lowering is only defined on `tensor` operands, not + // on shapes. if (op.getType().isa()) return failure(); - // For unranked tensors `shape_of` lowers to `scf` and the pattern can be - // found in the corresponding pass. - ShapeOfOp::Adaptor transformed(operands); - Value tensorVal = transformed.arg(); - Type tensorTy = tensorVal.getType(); - if (tensorTy.isa()) - return failure(); - - // Build values for individual dimensions. - SmallVector dimValues; - RankedTensorType rankedTensorTy = tensorTy.cast(); - int64_t rank = rankedTensorTy.getRank(); + assert(!op.lhs().getType().isa() && + !op.rhs().getType().isa()); auto loc = op.getLoc(); - for (int64_t i = 0; i < rank; i++) { - if (rankedTensorTy.isDynamicDim(i)) { - Value dimVal = rewriter.create(loc, tensorVal, i); - dimValues.push_back(dimVal); - } else { - int64_t dim = rankedTensorTy.getDimSize(i); - Value dimVal = rewriter.create(loc, dim); - dimValues.push_back(dimVal); - } - } - - // Materialize extent tensor. - Value staticExtentTensor = - rewriter.create(loc, dimValues); - rewriter.replaceOpWithNewOp(op, staticExtentTensor, - op.getType()); + BroadcastOp::Adaptor transformed(operands); + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + + // Find smaller and greater rank and extent tensor. + Value lhsRank = rewriter.create(loc, transformed.lhs(), zero); + Value rhsRank = rewriter.create(loc, transformed.rhs(), zero); + Value lhsSmaller = + rewriter.create(loc, CmpIPredicate::ule, lhsRank, rhsRank); + Type indexTy = rewriter.getIndexType(); + Type extentTensorTy = op.getType(); + auto ifOp = rewriter.create( + loc, TypeRange{indexTy, extentTensorTy, indexTy, extentTensorTy}, + lhsSmaller, + [&](OpBuilder &b, Location loc) { + b.create(loc, ValueRange{lhsRank, transformed.lhs(), + rhsRank, transformed.rhs()}); + }, + [&](OpBuilder &b, Location loc) { + b.create(loc, ValueRange{rhsRank, transformed.rhs(), + lhsRank, transformed.lhs()}); + }); + Value smallerRank = ifOp.getResult(0); + Value smallerOperand = ifOp.getResult(1); + Value greaterRank = ifOp.getResult(2); + Value greaterOperand = ifOp.getResult(3); + + // Allocate stack memory for the broadcasted extent tensor. + Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); + Value mem = rewriter.create(loc, memTy, ValueRange{greaterRank}); + + // Copy extents from greater operand that are not challenged. + Value rankDiff = + rewriter.create(loc, indexTy, greaterRank, smallerRank); + rewriter.create(loc, zero, rankDiff, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange) { + Value extent = b.create( + loc, greaterOperand, ValueRange{iv}); + b.create(loc, extent, mem, ValueRange{iv}); + b.create(loc); + }); + + // Determine remaining broadcasted extents. + rewriter.create( + loc, rankDiff, greaterRank, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange) { + Value greaterOperandExtent = + b.create(loc, greaterOperand, ValueRange{iv}); + Value greaterOperandExtentIsOne = + b.create(loc, CmpIPredicate::eq, greaterOperandExtent, one); + auto ifOp = b.create( + loc, TypeRange{indexTy}, greaterOperandExtentIsOne, + [&](OpBuilder &b, Location loc) { + Value ivShifted = b.create(loc, indexTy, iv, rankDiff); + Value smallerOperandExtent = b.create( + loc, smallerOperand, ValueRange{ivShifted}); + b.create(loc, smallerOperandExtent); + }, + [&](OpBuilder &b, Location loc) { + b.create(loc, greaterOperandExtent); + }); + Value extent = ifOp.getResult(0); + b.create(loc, extent, mem, ValueRange{iv}); + b.create(loc); + }); + + // Load broadcasted shape as an extent tensor. + rewriter.replaceOpWithNewOp(op, mem); return success(); } @@ -161,26 +190,23 @@ LogicalResult ConstShapeOpConverter::matchAndRewrite( } namespace { -class ToExtentTensorOpConversion - : public OpConversionPattern { +class ConstSizeOpConversion : public OpConversionPattern { public: - using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(ToExtentTensorOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - ToExtentTensorOpAdaptor adaptor(operands); - - if (!adaptor.input().getType().isa()) - return rewriter.notifyMatchFailure(op, "input needs to be a tensor"); - - rewriter.replaceOpWithNewOp(op, adaptor.input(), - op.getType()); - return success(); - } + matchAndRewrite(ConstSizeOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; }; } // namespace +LogicalResult ConstSizeOpConversion::matchAndRewrite( + ConstSizeOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + rewriter.replaceOpWithNewOp(op, op.value().getSExtValue()); + return success(); +} + namespace { class GetExtentOpConverter : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -239,6 +265,236 @@ RankOpConverter::matchAndRewrite(shape::RankOp op, ArrayRef operands, return success(); } +namespace { +/// Converts `shape.reduce` to `scf.for`. +struct ReduceOpConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(shape::ReduceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final; +}; +} // namespace + +LogicalResult +ReduceOpConverter::matchAndRewrite(shape::ReduceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + // For now, this lowering is only defined on `tensor` operands. + if (op.shape().getType().isa()) + return failure(); + + auto loc = op.getLoc(); + shape::ReduceOp::Adaptor transformed(operands); + + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + Type indexTy = rewriter.getIndexType(); + Value rank = rewriter.create(loc, indexTy, transformed.shape(), zero); + + auto loop = rewriter.create( + loc, zero, rank, one, op.initVals(), + [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { + Value extent = b.create(loc, transformed.shape(), iv); + + SmallVector mappedValues{iv, extent}; + mappedValues.append(args.begin(), args.end()); + + BlockAndValueMapping mapping; + Block *reduceBody = op.getBody(); + mapping.map(reduceBody->getArguments(), mappedValues); + for (auto &nested : reduceBody->without_terminator()) + b.clone(nested, mapping); + + SmallVector mappedResults; + for (auto result : reduceBody->getTerminator()->getOperands()) + mappedResults.push_back(mapping.lookup(result)); + b.create(loc, mappedResults); + }); + + rewriter.replaceOp(op, loop.getResults()); + return success(); +} + +namespace { +/// Converts `shape.shape_eq` to an `scf.for` loop. For now, the lowering is +/// only defined on `tensor` operands. The test for equality first +/// compares their size and, if equal, checks every extent for equality. +/// +/// Example: +/// +/// %result = shape.shape_eq %a, %b : tensor, tensor +/// +/// becomes +/// +/// %c0 = constant 0 : index +/// %0 = dim %arg0, %c0 : tensor +/// %1 = dim %arg1, %c0 : tensor +/// %2 = cmpi "eq", %0, %1 : index +/// %result = scf.if %2 -> (i1) { +/// %c1 = constant 1 : index +/// %true = constant true +/// %4 = scf.for %arg2 = %c0 to %0 step %c1 iter_args(%arg3 = %true) -> (i1) { +/// %5 = extract_element %arg0[%arg2] : tensor +/// %6 = extract_element %arg1[%arg2] : tensor +/// %7 = cmpi "eq", %5, %6 : index +/// %8 = and %arg3, %7 : i1 +/// scf.yield %8 : i1 +/// } +/// scf.yield %4 : i1 +/// } else { +/// %false = constant false +/// scf.yield %false : i1 +/// } +/// +struct ShapeEqOpConverter : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ShapeEqOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; +} // namespace + +LogicalResult +ShapeEqOpConverter::matchAndRewrite(ShapeEqOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + // For now, this lowering is only defined on `tensor` operands, not + // on shapes. + if (op.lhs().getType().isa() || + op.rhs().getType().isa()) { + return failure(); + } + + ShapeEqOp::Adaptor transformed(operands); + auto loc = op.getLoc(); + Type indexTy = rewriter.getIndexType(); + Value zero = rewriter.create(loc, 0); + Value lhsRank = rewriter.create(loc, indexTy, transformed.lhs(), zero); + Value rhsRank = rewriter.create(loc, indexTy, transformed.rhs(), zero); + Value eqRank = + rewriter.create(loc, CmpIPredicate::eq, lhsRank, rhsRank); + Type i1Ty = rewriter.getI1Type(); + rewriter.replaceOpWithNewOp( + op, i1Ty, eqRank, + [&](OpBuilder &b, Location loc) { + Value one = b.create(loc, 1); + Value init = b.create(loc, i1Ty, b.getBoolAttr(true)); + auto loop = b.create( + loc, zero, lhsRank, one, ValueRange{init}, + [&](OpBuilder &b, Location nestedLoc, Value iv, ValueRange args) { + Value conj = args[0]; + Value lhsExtent = + b.create(loc, transformed.lhs(), iv); + Value rhsExtent = + b.create(loc, transformed.rhs(), iv); + Value eqExtent = b.create(loc, CmpIPredicate::eq, + lhsExtent, rhsExtent); + Value conjNext = b.create(loc, conj, eqExtent); + b.create(loc, ValueRange({conjNext})); + }); + b.create(loc, loop.getResults()); + }, + [&](OpBuilder &b, Location loc) { + Value result = b.create(loc, i1Ty, b.getBoolAttr(false)); + b.create(loc, result); + }); + return success(); +} + +namespace { +class ShapeOfOpConversion : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ShapeOfOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; +} // namespace + +LogicalResult ShapeOfOpConversion::matchAndRewrite( + ShapeOfOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + + // For now, only error-free types are supported by this lowering. + if (op.getType().isa()) + return failure(); + + // For ranked tensor arguments, lower to `tensor_from_elements`. + ShapeOfOp::Adaptor transformed(operands); + Value tensor = transformed.arg(); + Type tensorTy = tensor.getType(); + if (tensorTy.isa()) { + + // Build values for individual extents. + SmallVector extentValues; + RankedTensorType rankedTensorTy = tensorTy.cast(); + int64_t rank = rankedTensorTy.getRank(); + auto loc = op.getLoc(); + for (int64_t i = 0; i < rank; i++) { + if (rankedTensorTy.isDynamicDim(i)) { + Value extent = rewriter.create(loc, tensor, i); + extentValues.push_back(extent); + } else { + Value extent = + rewriter.create(loc, rankedTensorTy.getDimSize(i)); + extentValues.push_back(extent); + } + } + + // Materialize extent tensor. + Value staticExtentTensor = + rewriter.create(loc, extentValues); + rewriter.replaceOpWithNewOp(op, staticExtentTensor, + op.getType()); + return success(); + } + + // Allocate stack memory. + auto loc = op.getLoc(); + Value rank = rewriter.create(loc, tensor); + Type indexTy = rewriter.getIndexType(); + Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); + Value mem = rewriter.create(loc, memTy, ValueRange{rank}); + + // Copy shape extents to stack-allocated memory. + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + rewriter.create( + loc, zero, rank, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { + Value dim = rewriter.create(loc, tensor, iv); + rewriter.create(loc, dim, mem, ValueRange{iv}); + rewriter.create(loc); + }); + + // Load extents to tensor value. + rewriter.replaceOpWithNewOp(op.getOperation(), mem); + return success(); +} + +namespace { +class ToExtentTensorOpConversion + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ToExtentTensorOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + ToExtentTensorOpAdaptor adaptor(operands); + + if (!adaptor.input().getType().isa()) + return rewriter.notifyMatchFailure(op, "input needs to be a tensor"); + + rewriter.replaceOpWithNewOp(op, adaptor.input(), + op.getType()); + return success(); + } +}; +} // namespace + namespace { /// Conversion pass. class ConvertShapeToStandardPass @@ -252,7 +508,7 @@ void ConvertShapeToStandardPass::runOnOperation() { // Setup target legality. MLIRContext &ctx = getContext(); ConversionTarget target(ctx); - target.addLegalDialect(); + target.addLegalDialect(); target.addLegalOp(); // Setup conversion patterns. @@ -271,11 +527,14 @@ void mlir::populateShapeToStandardConversionPatterns( patterns.insert< AnyOpConversion, BinaryOpConversion, - ConstShapeOpConverter, BinaryOpConversion, + BroadcastOpConverter, + ConstShapeOpConverter, ConstSizeOpConversion, GetExtentOpConverter, RankOpConverter, + ReduceOpConverter, + ShapeEqOpConverter, ShapeOfOpConversion, ToExtentTensorOpConversion>(ctx); // clang-format on diff --git a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir deleted file mode 100644 index cc384496dff05..0000000000000 --- a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir +++ /dev/null @@ -1,132 +0,0 @@ -// RUN: mlir-opt -convert-shape-to-scf -split-input-file %s | FileCheck %s - -// CHECK-LABEL: @shape_reduce -// CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index -func @shape_reduce(%shape : tensor) -> index { - %init = constant 1 : index - %num_elements = shape.reduce(%shape, %init) : tensor -> index { - ^bb0(%index : index, %extent : index, %acc: index): - %new_acc = muli %acc, %extent : index - shape.yield %new_acc : index - } - return %num_elements : index -} -// CHECK-NEXT: %[[INIT:.*]] = constant 1 : index -// CHECK-NEXT: %[[C0:.*]] = constant 0 : index -// CHECK-NEXT: %[[C1:.*]] = constant 1 : index -// CHECK-NEXT: %[[RANK:.*]] = dim %[[SHAPE]], %[[C0]] : tensor -// CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) -> (index) -// CHECK-NEXT: %[[EXTENT:.*]] = extract_element %[[SHAPE]][%[[I]]] -// CHECK-NEXT: %[[NEW_ACC:.*]] = muli %[[ACC]], %[[EXTENT]] : index -// CHECK-NEXT: scf.yield %[[NEW_ACC]] : index -// CHECK-NEXT: } -// CHECK-NEXT: return %[[RESULT]] : index - -// ----- - -// Don't lower `shape_of` for result type of `shape.shape`. -// CHECK-LABEL: @shape_of -// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) -func @shape_of(%arg : tensor<*xf32>) { - // CHECK: shape.shape - %shape = shape.shape_of %arg : tensor<*xf32> -> !shape.shape - return -} - -// ----- - -// Lower `shape_of` for unranked tensors. -// CHECK-LABEL: @shape_of_unranked -// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) -func @shape_of_unranked(%arg : tensor<*xf32>) { - // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> - // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { - // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> - // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref - // CHECK: } - // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref - %shape = shape.shape_of %arg : tensor<*xf32> -> tensor - return -} - -// ----- - -// CHECK-LABEL: @shape_eq -// CHECK-SAME: (%[[A:.*]]: tensor, %[[B:.*]]: tensor) -> i1 -func @shape_eq(%a : tensor, %b : tensor) -> i1 { - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[RANK_A:.*]] = dim %[[A]], %[[C0]] : tensor - // CHECK: %[[RANK_B:.*]] = dim %[[B]], %[[C0]] : tensor - // CHECK: %[[RANK_EQ:.*]] = cmpi "eq", %[[RANK_A]], %[[RANK_B]] - // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) { - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: %[[INIT:.*]] = constant true - // CHECK: %[[SHAPE_EQ_INNER:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK_A]] step %[[C1]] iter_args(%[[CONJ:.*]] = %[[INIT]]) -> (i1) { - // CHECK: %[[EXTENT_A:.*]] = extract_element %[[A]][%[[I]]] : tensor - // CHECK: %[[EXTENT_B:.*]] = extract_element %[[B]][%[[I]]] : tensor - // CHECK: %[[EXTENT_EQ:.*]] = cmpi "eq", %[[EXTENT_A]], %[[EXTENT_B]] - // CHECK: %[[CONJ_NEXT:.*]] = and %[[CONJ]], %[[EXTENT_EQ]] - // CHECK: scf.yield %[[CONJ_NEXT]] : i1 - // CHECK: } - // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 - // CHECK: } else { - // CHECK: %[[SHAPE_EQ_INNER:.*]] = constant false - // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 - // CHECK: } - // CHECK: return %[[SHAPE_EQ]] : i1 - %result = shape.shape_eq %a, %b : tensor, tensor - return %result : i1 -} - -// ----- - -// Don't lower `shape.broadcast` if a `shape.shape` type is involved. -// CHECK-LABEL: @broadcast -func @broadcast(%a : tensor, %b : !shape.shape) -> !shape.shape { - // CHECK: shape.broadcast - %c = shape.broadcast %a, %b : tensor, !shape.shape -> !shape.shape - return %c : !shape.shape -} - -// ----- - -// CHECK-LABEL: @broadcast -// CHECK-SAME: (%[[LHS:.*]]: tensor, %[[RHS:.*]]: tensor) -func @broadcast(%a : tensor, %b : tensor) { - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor - // CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor - // CHECK: %[[LHS_SMALLER:.*]] = cmpi "ule", %[[LHS_RANK]], %[[RHS_RANK]] - // CHECK: %[[ARG:.*]]:4 = scf.if %[[LHS_SMALLER]] -> (index, tensor, index, tensor) { - // CHECK: scf.yield %[[LHS_RANK]], %[[LHS]], %[[RHS_RANK]], %[[RHS]] : index, tensor, index, tensor - // CHECK: } else { - // CHECK: scf.yield %[[RHS_RANK]], %[[RHS]], %[[LHS_RANK]], %[[LHS]] : index, tensor, index, tensor - // CHECK: } - // CHECK: %[[MEM:.*]] = alloca(%[[ARG]]#2) : memref - // CHECK: %[[RANK_DIFF:.*]] = subi %[[ARG]]#2, %[[ARG]]#0 : index - // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[RANK_DIFF]] step %[[C1]] { - // CHECK: %[[EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor - // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref - // CHECK: } - // CHECK: scf.for %[[IV:.*]] = %[[RANK_DIFF]] to %[[ARG]]#2 step %[[C1]] { - // CHECK: %[[GREATER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor - // CHECK: %[[GREATER_OPERAND_EXTENT_IS_ONE:.*]] = cmpi "eq", %[[GREATER_OPERAND_EXTENT]], %[[C1]] : index - // CHECK: %[[EXTENT:.*]] = scf.if %[[GREATER_OPERAND_EXTENT_IS_ONE]] -> (index) { - // CHECK: %[[IV_SHIFTED:.*]] = subi %[[IV]], %[[RANK_DIFF]] : index - // CHECK: %[[SMALLER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#1[%[[IV_SHIFTED]]] : tensor - // CHECK: scf.yield %[[SMALLER_OPERAND_EXTENT]] : index - // CHECK: } else { - // CHECK: scf.yield %[[GREATER_OPERAND_EXTENT]] : index - // CHECK: } - // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref - // CHECK: } - // CHECK: %[[BROADCASTED:.*]] = tensor_load %[[MEM]] : memref - %0 = shape.broadcast %a, %b - : tensor, tensor -> tensor - return -} - diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index b0fb5bac9071b..bf8e74e5143ed 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -26,46 +26,6 @@ func @binary_ops_on_size(%lhs : !shape.size, %rhs : !shape.size) { // ----- -// Don't lower `shape_of` with `shape.shape` type. -// CHECK-LABEL: @shape_of -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) -func @shape_of_stat(%arg : tensor<1x2x3xf32>) { - // CHECK: shape.shape_of %[[ARG]] : tensor<1x2x3xf32> -> !shape.shape - %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> !shape.shape - return -} - -// ----- - -// Lower `shape_of` for statically shaped tensor. -// CHECK-LABEL: @shape_of_stat -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) -func @shape_of_stat(%arg : tensor<1x2x3xf32>) { - // CHECK-DAG: %[[C1:.*]] = constant 1 : index - // CHECK-DAG: %[[C2:.*]] = constant 2 : index - // CHECK-DAG: %[[C3:.*]] = constant 3 : index - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) : tensor<3xindex> - %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> tensor - return -} - -// ----- - -// Lower `shape_of` for dynamically shaped tensor. -// CHECK-LABEL: @shape_of_dyn -// CHECK-SAME: (%[[ARG:.*]]: tensor<1x5x?xf32>) -func @shape_of_dyn(%arg : tensor<1x5x?xf32>) { - // CHECK-DAG: %[[C1:.*]] = constant 1 : index - // CHECK-DAG: %[[C5:.*]] = constant 5 : index - // CHECK-DAG: %[[C2:.*]] = constant 2 : index - // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32> - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C5]], %[[DYN_DIM]]) : tensor<3xindex> - %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor - return -} - -// ----- - // Convert `rank` to `dim` of the first dimension. // CHECK-LABEL: @rank // CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index @@ -190,3 +150,174 @@ func @to_extent_tensor(%arg: tensor) -> tensor<3xindex> { // CHECK: return %[[RES]] return %casted : tensor<3xindex> } + +// CHECK-LABEL: @shape_reduce +// CHECK-SAME: (%[[SHAPE:.*]]: tensor) -> index +func @shape_reduce(%shape : tensor) -> index { + %init = constant 1 : index + %num_elements = shape.reduce(%shape, %init) : tensor -> index { + ^bb0(%index : index, %extent : index, %acc: index): + %new_acc = muli %acc, %extent : index + shape.yield %new_acc : index + } + return %num_elements : index +} +// CHECK-NEXT: %[[INIT:.*]] = constant 1 : index +// CHECK-NEXT: %[[C0:.*]] = constant 0 : index +// CHECK-NEXT: %[[C1:.*]] = constant 1 : index +// CHECK-NEXT: %[[RANK:.*]] = dim %[[SHAPE]], %[[C0]] : tensor +// CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) -> (index) +// CHECK-NEXT: %[[EXTENT:.*]] = extract_element %[[SHAPE]][%[[I]]] +// CHECK-NEXT: %[[NEW_ACC:.*]] = muli %[[ACC]], %[[EXTENT]] : index +// CHECK-NEXT: scf.yield %[[NEW_ACC]] : index +// CHECK-NEXT: } +// CHECK-NEXT: return %[[RESULT]] : index + +// ----- + +// Don't lower `shape_of` for result type of `shape.shape`. +// CHECK-LABEL: @shape_of +// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) +func @shape_of(%arg : tensor<*xf32>) { + // CHECK: shape.shape + %shape = shape.shape_of %arg : tensor<*xf32> -> !shape.shape + return +} + +// ----- + +// Lower `shape_of` for unranked tensors. +// CHECK-LABEL: @shape_of_unranked +// CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) +func @shape_of_unranked(%arg : tensor<*xf32>) { + // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> + // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { + // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> + // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref + // CHECK: } + // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref + %shape = shape.shape_of %arg : tensor<*xf32> -> tensor + return +} + +// ----- + +// Don't lower `shape_of` with `shape.shape` type. +// CHECK-LABEL: @shape_of +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) +func @shape_of_stat(%arg : tensor<1x2x3xf32>) { + // CHECK: shape.shape_of %[[ARG]] : tensor<1x2x3xf32> -> !shape.shape + %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> !shape.shape + return +} + +// ----- + +// Lower `shape_of` for statically shaped tensor. +// CHECK-LABEL: @shape_of_stat +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x2x3xf32>) +func @shape_of_stat(%arg : tensor<1x2x3xf32>) { + // CHECK-DAG: %[[C1:.*]] = constant 1 : index + // CHECK-DAG: %[[C2:.*]] = constant 2 : index + // CHECK-DAG: %[[C3:.*]] = constant 3 : index + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) : tensor<3xindex> + %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> tensor + return +} + +// ----- + +// Lower `shape_of` for dynamically shaped tensor. +// CHECK-LABEL: @shape_of_dyn +// CHECK-SAME: (%[[ARG:.*]]: tensor<1x5x?xf32>) +func @shape_of_dyn(%arg : tensor<1x5x?xf32>) { + // CHECK-DAG: %[[C1:.*]] = constant 1 : index + // CHECK-DAG: %[[C5:.*]] = constant 5 : index + // CHECK-DAG: %[[C2:.*]] = constant 2 : index + // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32> + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C5]], %[[DYN_DIM]]) : tensor<3xindex> + %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor + return +} + +// ----- + +// CHECK-LABEL: @shape_eq +// CHECK-SAME: (%[[A:.*]]: tensor, %[[B:.*]]: tensor) -> i1 +func @shape_eq(%a : tensor, %b : tensor) -> i1 { + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[RANK_A:.*]] = dim %[[A]], %[[C0]] : tensor + // CHECK: %[[RANK_B:.*]] = dim %[[B]], %[[C0]] : tensor + // CHECK: %[[RANK_EQ:.*]] = cmpi "eq", %[[RANK_A]], %[[RANK_B]] + // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) { + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: %[[INIT:.*]] = constant true + // CHECK: %[[SHAPE_EQ_INNER:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK_A]] step %[[C1]] iter_args(%[[CONJ:.*]] = %[[INIT]]) -> (i1) { + // CHECK: %[[EXTENT_A:.*]] = extract_element %[[A]][%[[I]]] : tensor + // CHECK: %[[EXTENT_B:.*]] = extract_element %[[B]][%[[I]]] : tensor + // CHECK: %[[EXTENT_EQ:.*]] = cmpi "eq", %[[EXTENT_A]], %[[EXTENT_B]] + // CHECK: %[[CONJ_NEXT:.*]] = and %[[CONJ]], %[[EXTENT_EQ]] + // CHECK: scf.yield %[[CONJ_NEXT]] : i1 + // CHECK: } + // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 + // CHECK: } else { + // CHECK: %[[SHAPE_EQ_INNER:.*]] = constant false + // CHECK: scf.yield %[[SHAPE_EQ_INNER]] : i1 + // CHECK: } + // CHECK: return %[[SHAPE_EQ]] : i1 + %result = shape.shape_eq %a, %b : tensor, tensor + return %result : i1 +} + +// ----- + +// Don't lower `shape.broadcast` if a `shape.shape` type is involved. +// CHECK-LABEL: @broadcast +func @broadcast(%a : tensor, %b : !shape.shape) -> !shape.shape { + // CHECK: shape.broadcast + %c = shape.broadcast %a, %b : tensor, !shape.shape -> !shape.shape + return %c : !shape.shape +} + +// ----- + +// CHECK-LABEL: @broadcast +// CHECK-SAME: (%[[LHS:.*]]: tensor, %[[RHS:.*]]: tensor) +func @broadcast(%a : tensor, %b : tensor) { + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor + // CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor + // CHECK: %[[LHS_SMALLER:.*]] = cmpi "ule", %[[LHS_RANK]], %[[RHS_RANK]] + // CHECK: %[[ARG:.*]]:4 = scf.if %[[LHS_SMALLER]] -> (index, tensor, index, tensor) { + // CHECK: scf.yield %[[LHS_RANK]], %[[LHS]], %[[RHS_RANK]], %[[RHS]] : index, tensor, index, tensor + // CHECK: } else { + // CHECK: scf.yield %[[RHS_RANK]], %[[RHS]], %[[LHS_RANK]], %[[LHS]] : index, tensor, index, tensor + // CHECK: } + // CHECK: %[[MEM:.*]] = alloca(%[[ARG]]#2) : memref + // CHECK: %[[RANK_DIFF:.*]] = subi %[[ARG]]#2, %[[ARG]]#0 : index + // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[RANK_DIFF]] step %[[C1]] { + // CHECK: %[[EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor + // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref + // CHECK: } + // CHECK: scf.for %[[IV:.*]] = %[[RANK_DIFF]] to %[[ARG]]#2 step %[[C1]] { + // CHECK: %[[GREATER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor + // CHECK: %[[GREATER_OPERAND_EXTENT_IS_ONE:.*]] = cmpi "eq", %[[GREATER_OPERAND_EXTENT]], %[[C1]] : index + // CHECK: %[[EXTENT:.*]] = scf.if %[[GREATER_OPERAND_EXTENT_IS_ONE]] -> (index) { + // CHECK: %[[IV_SHIFTED:.*]] = subi %[[IV]], %[[RANK_DIFF]] : index + // CHECK: %[[SMALLER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#1[%[[IV_SHIFTED]]] : tensor + // CHECK: scf.yield %[[SMALLER_OPERAND_EXTENT]] : index + // CHECK: } else { + // CHECK: scf.yield %[[GREATER_OPERAND_EXTENT]] : index + // CHECK: } + // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref + // CHECK: } + // CHECK: %[[BROADCASTED:.*]] = tensor_load %[[MEM]] : memref + %0 = shape.broadcast %a, %b + : tensor, tensor -> tensor + return +} + From b22910daab95be1ebc6ab8a74190e38130b0e6ef Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 10:26:42 -0400 Subject: [PATCH 361/465] [InstCombine] erase instructions leading up to unreachable Normal dead code elimination ignores assume intrinsics, so we fail to delete assumes that are not meaningful (and potentially worse if they cause conflicts with other assumptions). The motivating example in https://llvm.org/PR47416 suggests that we might have problems upstream from here (difference between C and C++), but this should be a cheap way to make sure we remove more dead code. Differential Revision: https://reviews.llvm.org/D87149 --- .../Transforms/InstCombine/InstCombineInternal.h | 1 + .../Transforms/InstCombine/InstructionCombining.cpp | 13 +++++++++++++ llvm/test/Transforms/InstCombine/assume.ll | 10 +--------- .../Transforms/InstCombine/pr33689_same_bitwidth.ll | 2 -- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index a03cb5e470511..62ee7d00780ef 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -159,6 +159,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *visitFenceInst(FenceInst &FI); Instruction *visitSwitchInst(SwitchInst &SI); Instruction *visitReturnInst(ReturnInst &RI); + Instruction *visitUnreachableInst(UnreachableInst &I); Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI); Instruction *visitInsertValueInst(InsertValueInst &IV); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 178e9a4a17bdd..0ca256860c596 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2798,6 +2798,19 @@ Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) { return nullptr; } +Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { + // Try to remove the previous instruction if it must lead to unreachable. + // This includes instructions like stores and "llvm.assume" that may not get + // removed by simple dead code elimination. + Instruction *Prev = I.getPrevNonDebugInstruction(); + if (Prev && !Prev->isEHPad() && + isGuaranteedToTransferExecutionToSuccessor(Prev)) { + eraseInstFromFunction(*Prev); + return &I; + } + return nullptr; +} + Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) { assert(BI.isUnconditional() && "Only for unconditional branches."); diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index b55b1c21c0b9c..8ca24caa2aa1b 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -S -instcombine-infinite-loop-threshold=2 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -543,7 +543,6 @@ define i8 @conflicting_assumptions(i8 %x){ define void @PR36270(i32 %b) { ; CHECK-LABEL: @PR36270( -; CHECK-NEXT: tail call void @llvm.assume(i1 false) ; CHECK-NEXT: unreachable ; %B7 = xor i32 -1, 2147483647 @@ -573,8 +572,6 @@ define i32 @unreachable_assume(i32 %x, i32 %y) { ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP3]]) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[X]], 2 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP4]]) ; CHECK-NEXT: unreachable ; entry: @@ -612,11 +609,6 @@ define i32 @unreachable_assumes_and_store(i32 %x, i32 %y, i32* %p) { ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP3]]) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[X]], 2 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP4]]) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[Y]], 42 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP5]]) -; CHECK-NEXT: store i32 [[X]], i32* [[P:%.*]], align 4 ; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll index 0ffd41d60a4c8..d533703ac6fbe 100644 --- a/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll +++ b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll @@ -17,8 +17,6 @@ define void @f(i1 %cond) { ; CHECK-NEXT: [[T12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[T12]], i16 0, i16 0 ; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[T8:%.*]] = ptrtoint [2 x i32]* [[T12]] to i16 -; CHECK-NEXT: store i16 [[T8]], i16* @a, align 2 ; CHECK-NEXT: unreachable ; CHECK: bb2: ; CHECK-NEXT: [[T9:%.*]] = load i16*, i16** @b, align 2 From 60162626a5c963125a2e7012b621c7ba0b57855e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 15:31:54 +0100 Subject: [PATCH 362/465] [X86] Replace UpgradeX86AddSubSatIntrinsics with UpgradeX86BinaryIntrinsics generic helper. NFCI. Feed the Intrinsic::ID value directly instead of via the IsSigned/IsAddition bool flags. --- llvm/lib/IR/AutoUpgrade.cpp | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 5ec3eb85d99d4..12286264c81df 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1208,15 +1208,11 @@ static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI, return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru); } -static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI, - bool IsSigned, bool IsAddition) { +static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI, + Intrinsic::ID IID) { Type *Ty = CI.getType(); Value *Op0 = CI.getOperand(0); Value *Op1 = CI.getOperand(1); - - Intrinsic::ID IID = - IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat) - : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat); Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); Value *Res = Builder.CreateCall(Intrin, {Op0, Op1}); @@ -2486,23 +2482,23 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); } else if (IsX86 && (Name.startswith("sse2.padds.") || - Name.startswith("sse2.psubs.") || Name.startswith("avx2.padds.") || - Name.startswith("avx2.psubs.") || Name.startswith("avx512.padds.") || + Name.startswith("avx512.mask.padds."))) { + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat); + } else if (IsX86 && (Name.startswith("sse2.psubs.") || + Name.startswith("avx2.psubs.") || Name.startswith("avx512.psubs.") || - Name.startswith("avx512.mask.padds.") || Name.startswith("avx512.mask.psubs."))) { - bool IsAdd = Name.contains(".padds"); - Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd); + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat); } else if (IsX86 && (Name.startswith("sse2.paddus.") || - Name.startswith("sse2.psubus.") || Name.startswith("avx2.paddus.") || + Name.startswith("avx512.mask.paddus."))) { + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat); + } else if (IsX86 && (Name.startswith("sse2.psubus.") || Name.startswith("avx2.psubus.") || - Name.startswith("avx512.mask.paddus.") || Name.startswith("avx512.mask.psubus."))) { - bool IsAdd = Name.contains(".paddus"); - Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd); + Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat); } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), CI->getArgOperand(1), From 7993431dad064d03244f32c9585325b891f3e807 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 15:53:15 +0100 Subject: [PATCH 363/465] [X86][SSE] Add test cases for PR47448 --- llvm/test/CodeGen/X86/vector-unsigned-cmp.ll | 92 ++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll index 3bbf4f20d60af..c7707297c50b4 100644 --- a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll @@ -451,3 +451,95 @@ define <16 x i1> @ule_v16i8(<16 x i8> %x, <16 x i8> %y) { ret <16 x i1> %cmp } +define <8 x i16> @PR47448_uge(i16 signext %0) { +; SSE2-LABEL: PR47448_uge: +; SSE2: # %bb.0: +; SSE2-NEXT: andl $7, %edi +; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] +; SSE2-NEXT: psubusw %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: PR47448_uge: +; SSE41: # %bb.0: +; SSE41-NEXT: andl $7, %edi +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7] +; SSE41-NEXT: pmaxuw %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: PR47448_uge: +; AVX1: # %bb.0: +; AVX1-NEXT: andl $7, %edi +; AVX1-NEXT: vmovd %edi, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR47448_uge: +; AVX2: # %bb.0: +; AVX2-NEXT: andl $7, %edi +; AVX2-NEXT: vmovd %edi, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq + %2 = and i16 %0, 7 + %3 = insertelement <8 x i16> undef, i16 %2, i32 0 + %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer + %5 = icmp uge <8 x i16> %4, + %6 = sext <8 x i1> %5 to <8 x i16> + ret <8 x i16> %6 +} + +define <8 x i16> @PR47448_ugt(i16 signext %0) { +; SSE-LABEL: PR47448_ugt: +; SSE: # %bb.0: +; SSE-NEXT: andl $7, %edi +; SSE-NEXT: movd %edi, %xmm0 +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] +; SSE-NEXT: psubusw %xmm0, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: PR47448_ugt: +; AVX1: # %bb.0: +; AVX1-NEXT: andl $7, %edi +; AVX1-NEXT: vmovd %edi, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] +; AVX1-NEXT: vpsubusw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR47448_ugt: +; AVX2: # %bb.0: +; AVX2-NEXT: andl $7, %edi +; AVX2-NEXT: vmovd %edi, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] +; AVX2-NEXT: vpsubusw %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq + %2 = and i16 %0, 7 + %3 = insertelement <8 x i16> undef, i16 %2, i32 0 + %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer + %5 = icmp ugt <8 x i16> %4, + %6 = sext <8 x i1> %5 to <8 x i16> + ret <8 x i16> %6 +} From 9de0a3da6a76030f96a2d6793ca4f094fa538db5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 16:11:40 +0100 Subject: [PATCH 364/465] [X86][SSE] Don't use LowerVSETCCWithSUBUS for unsigned compare with +ve operands (PR47448) We already simplify the unsigned comparisons if we've found the operands are non-negative, but we were still calling LowerVSETCCWithSUBUS which resulted in the PR47448 regressions. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 ++-- llvm/test/CodeGen/X86/vector-unsigned-cmp.ll | 33 +++++++------------- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a8a41d9a1bb75..1395db57b57a0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -22731,8 +22731,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } // Try to use SUBUS and PCMPEQ. - if (SDValue V = LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG)) - return V; + if (FlipSigns) + if (SDValue V = + LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG)) + return V; // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple diff --git a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll index c7707297c50b4..d6849a94ddb91 100644 --- a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll @@ -285,9 +285,9 @@ define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SSE2: # %bb.0: ; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: psubusw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: pcmpgtw %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: uge_v8i16: @@ -316,9 +316,9 @@ define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SSE2: # %bb.0: ; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: psubusw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ule_v8i16: @@ -459,9 +459,9 @@ define <8 x i16> @PR47448_uge(i16 signext %0) { ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] -; SSE2-NEXT: psubusw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: pcmpgtw %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: PR47448_uge: @@ -508,10 +508,7 @@ define <8 x i16> @PR47448_ugt(i16 signext %0) { ; SSE-NEXT: movd %edi, %xmm0 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE-NEXT: psubusw %xmm0, %xmm1 -; SSE-NEXT: pxor %xmm0, %xmm0 -; SSE-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: PR47448_ugt: @@ -520,10 +517,7 @@ define <8 x i16> @PR47448_ugt(i16 signext %0) { ; AVX1-NEXT: vmovd %edi, %xmm0 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX1-NEXT: vpsubusw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: PR47448_ugt: @@ -531,10 +525,7 @@ define <8 x i16> @PR47448_ugt(i16 signext %0) { ; AVX2-NEXT: andl $7, %edi ; AVX2-NEXT: vmovd %edi, %xmm0 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX2-NEXT: vpsubusw %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq %2 = and i16 %0, 7 %3 = insertelement <8 x i16> undef, i16 %2, i32 0 From c4056f842827db97e9861ae92360202aa0863199 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 16:17:31 +0100 Subject: [PATCH 365/465] [Sparc] Add reduced funnel shift test case for PR47303 --- llvm/test/CodeGen/SPARC/fshl.ll | 48 +++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 llvm/test/CodeGen/SPARC/fshl.ll diff --git a/llvm/test/CodeGen/SPARC/fshl.ll b/llvm/test/CodeGen/SPARC/fshl.ll new file mode 100644 index 0000000000000..d841619457249 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/fshl.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s + +define <2 x i64> @fshl_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { +; CHECK-LABEL: fshl_v2i64: +; CHECK: .cfi_startproc +; CHECK-NEXT: .register %g2, #scratch +; CHECK-NEXT: .register %g3, #scratch +; CHECK-NEXT: ! %bb.0: ! %bb +; CHECK-NEXT: mov 63, %g2 +; CHECK-NEXT: andn %g2, %o4, %g3 +; CHECK-NEXT: srlx %o2, 1, %o2 +; CHECK-NEXT: srlx %o2, %g3, %o2 +; CHECK-NEXT: and %o4, 63, %o4 +; CHECK-NEXT: sllx %o0, %o4, %o0 +; CHECK-NEXT: or %o0, %o2, %o0 +; CHECK-NEXT: andn %g2, %o5, %o2 +; CHECK-NEXT: srlx %o3, 1, %o3 +; CHECK-NEXT: srlx %o3, %o2, %o2 +; CHECK-NEXT: and %o5, 63, %o3 +; CHECK-NEXT: sllx %o1, %o3, %o1 +; CHECK-NEXT: retl +; CHECK-NEXT: or %o1, %o2, %o1 +bb: + %i = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) + ret <2 x i64> %i +} + +define i32 @PR47303() { +; CHECK-LABEL: PR47303: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: ! %bb +; CHECK-NEXT: retl +; CHECK-NEXT: mov 0, %o0 +bb: + %i = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> ) + %i1 = add <4 x i64> %i, zeroinitializer + %i2 = add <4 x i64> %i1, zeroinitializer + %i3 = extractelement <4 x i64> %i2, i32 0 + %i4 = add i64 0, %i3 + %i5 = xor i64 0, %i4 + %i6 = trunc i64 %i5 to i32 + %i7 = mul i32 %i6, 797982799 + ret i32 %i7 +} + +declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) From 783d7116dc8b739263125c607ec034f9d580291e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 16:39:42 +0100 Subject: [PATCH 366/465] AntiDepBreaker.h - remove unnecessary ScheduleDAG.h include. NFCI. --- llvm/include/llvm/CodeGen/AntiDepBreaker.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/AntiDepBreaker.h b/llvm/include/llvm/CodeGen/AntiDepBreaker.h index d75c13e2dd756..0553d7d452a46 100644 --- a/llvm/include/llvm/CodeGen/AntiDepBreaker.h +++ b/llvm/include/llvm/CodeGen/AntiDepBreaker.h @@ -17,7 +17,6 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Compiler.h" #include From 2e1827271cb1c090cced7369282f9edcf9e59183 Mon Sep 17 00:00:00 2001 From: Richard Barton Date: Mon, 7 Sep 2020 16:31:12 +0100 Subject: [PATCH 367/465] [flang] Fix link to old repo location in doxygen mainpage. NFC. --- flang/docs/doxygen-mainpage.dox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/docs/doxygen-mainpage.dox b/flang/docs/doxygen-mainpage.dox index 78469e31312cf..e4bc3f6602425 100644 --- a/flang/docs/doxygen-mainpage.dox +++ b/flang/docs/doxygen-mainpage.dox @@ -6,7 +6,7 @@ /// This documentation describes the **internal** software that makes /// up flang, not the **external** use of flang. There are no instructions /// here on how to use flang, only the APIs that make up the software. For -/// usage instructions, please see the [project website](https://github.com/flang-compiler/f18) +/// usage instructions, please see the [project website](https://github.com/llvm-project/flang) /// for further detail. /// /// \section main_caveat Caveat From d01280587d97eb02d37da37666afd3e4d57c9336 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Thu, 23 Jul 2020 16:57:16 +0200 Subject: [PATCH 368/465] [analyzer][StdLibraryFunctionsChecker] Add POSIX pthread handling functions Differential Revision: https://reviews.llvm.org/D84415 --- .../Checkers/StdLibraryFunctionsChecker.cpp | 95 +++++++++++++++++++ .../Analysis/std-c-library-functions-POSIX.c | 42 ++++++++ 2 files changed, 137 insertions(+) diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index ddde629f44a5c..b71c19a80da90 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -951,6 +951,8 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( const QualType ConstWchar_tPtrTy = getPointerTy(getConstTy(WCharTy)); // const wchar_t * const QualType ConstVoidPtrRestrictTy = getRestrictTy(ConstVoidPtrTy); + const QualType SizePtrTy = getPointerTy(SizeTy); + const QualType SizePtrRestrictTy = getRestrictTy(SizePtrTy); const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue(); const RangeInt UnsignedIntMax = @@ -2182,6 +2184,99 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( Summary(ArgTypes{IntTy, StructItimervalPtrTy}, RetType{IntTy}, NoEvalCall) .ArgConstraint(NotNull(ArgNo(1)))); + + Optional Pthread_cond_tTy = lookupTy("pthread_cond_t"); + Optional Pthread_cond_tPtrTy = getPointerTy(Pthread_cond_tTy); + Optional Pthread_tTy = lookupTy("pthread_t"); + Optional Pthread_tPtrTy = getPointerTy(Pthread_tTy); + Optional Pthread_tPtrRestrictTy = getRestrictTy(Pthread_tPtrTy); + Optional Pthread_mutex_tTy = lookupTy("pthread_mutex_t"); + Optional Pthread_mutex_tPtrTy = getPointerTy(Pthread_mutex_tTy); + Optional Pthread_mutex_tPtrRestrictTy = + getRestrictTy(Pthread_mutex_tPtrTy); + Optional Pthread_attr_tTy = lookupTy("pthread_attr_t"); + Optional Pthread_attr_tPtrTy = getPointerTy(Pthread_attr_tTy); + Optional ConstPthread_attr_tPtrTy = + getPointerTy(getConstTy(Pthread_attr_tTy)); + Optional ConstPthread_attr_tPtrRestrictTy = + getRestrictTy(ConstPthread_attr_tPtrTy); + Optional Pthread_mutexattr_tTy = lookupTy("pthread_mutexattr_t"); + Optional ConstPthread_mutexattr_tPtrTy = + getPointerTy(getConstTy(Pthread_mutexattr_tTy)); + Optional ConstPthread_mutexattr_tPtrRestrictTy = + getRestrictTy(ConstPthread_mutexattr_tPtrTy); + + QualType PthreadStartRoutineTy = getPointerTy( + ACtx.getFunctionType(/*ResultTy=*/VoidPtrTy, /*Args=*/VoidPtrTy, + FunctionProtoType::ExtProtoInfo())); + + // int pthread_cond_signal(pthread_cond_t *cond); + // int pthread_cond_broadcast(pthread_cond_t *cond); + addToFunctionSummaryMap( + {"pthread_cond_signal", "pthread_cond_broadcast"}, + Signature(ArgTypes{Pthread_cond_tPtrTy}, RetType{IntTy}), + Summary(NoEvalCall).ArgConstraint(NotNull(ArgNo(0)))); + + // int pthread_create(pthread_t *restrict thread, + // const pthread_attr_t *restrict attr, + // void *(*start_routine)(void*), void *restrict arg); + addToFunctionSummaryMap( + "pthread_create", + Signature(ArgTypes{Pthread_tPtrRestrictTy, + ConstPthread_attr_tPtrRestrictTy, + PthreadStartRoutineTy, VoidPtrRestrictTy}, + RetType{IntTy}), + Summary(NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(2)))); + + // int pthread_attr_destroy(pthread_attr_t *attr); + // int pthread_attr_init(pthread_attr_t *attr); + addToFunctionSummaryMap( + {"pthread_attr_destroy", "pthread_attr_init"}, + Signature(ArgTypes{Pthread_attr_tPtrTy}, RetType{IntTy}), + Summary(NoEvalCall).ArgConstraint(NotNull(ArgNo(0)))); + + // int pthread_attr_getstacksize(const pthread_attr_t *restrict attr, + // size_t *restrict stacksize); + // int pthread_attr_getguardsize(const pthread_attr_t *restrict attr, + // size_t *restrict guardsize); + addToFunctionSummaryMap( + {"pthread_attr_getstacksize", "pthread_attr_getguardsize"}, + Signature(ArgTypes{ConstPthread_attr_tPtrRestrictTy, SizePtrRestrictTy}, + RetType{IntTy}), + Summary(NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(1)))); + + // int pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize); + // int pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize); + addToFunctionSummaryMap( + {"pthread_attr_setstacksize", "pthread_attr_setguardsize"}, + Signature(ArgTypes{Pthread_attr_tPtrTy, SizeTy}, RetType{IntTy}), + Summary(NoEvalCall) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint( + ArgumentCondition(1, WithinRange, Range(0, SizeMax)))); + + // int pthread_mutex_init(pthread_mutex_t *restrict mutex, const + // pthread_mutexattr_t *restrict attr); + addToFunctionSummaryMap( + "pthread_mutex_init", + Signature(ArgTypes{Pthread_mutex_tPtrRestrictTy, + ConstPthread_mutexattr_tPtrRestrictTy}, + RetType{IntTy}), + Summary(NoEvalCall).ArgConstraint(NotNull(ArgNo(0)))); + + // int pthread_mutex_destroy(pthread_mutex_t *mutex); + // int pthread_mutex_lock(pthread_mutex_t *mutex); + // int pthread_mutex_trylock(pthread_mutex_t *mutex); + // int pthread_mutex_unlock(pthread_mutex_t *mutex); + addToFunctionSummaryMap( + {"pthread_mutex_destroy", "pthread_mutex_lock", "pthread_mutex_trylock", + "pthread_mutex_unlock"}, + Signature(ArgTypes{Pthread_mutex_tPtrTy}, RetType{IntTy}), + Summary(NoEvalCall).ArgConstraint(NotNull(ArgNo(0)))); } // Functions for testing. diff --git a/clang/test/Analysis/std-c-library-functions-POSIX.c b/clang/test/Analysis/std-c-library-functions-POSIX.c index d65e9f029b6be..c2c98df864899 100644 --- a/clang/test/Analysis/std-c-library-functions-POSIX.c +++ b/clang/test/Analysis/std-c-library-functions-POSIX.c @@ -108,6 +108,20 @@ // CHECK: Loaded summary for: struct tm *gmtime(const time_t *tp) // CHECK: Loaded summary for: int clock_gettime(clockid_t clock_id, struct timespec *tp) // CHECK: Loaded summary for: int getitimer(int which, struct itimerval *curr_value) +// CHECK: Loaded summary for: int pthread_cond_signal(pthread_cond_t *cond) +// CHECK: Loaded summary for: int pthread_cond_broadcast(pthread_cond_t *cond) +// CHECK: Loaded summary for: int pthread_create(pthread_t *restrict thread, const pthread_attr_t *restrict attr, void *(*start_routine)(void *), void *restrict arg) +// CHECK: Loaded summary for: int pthread_attr_destroy(pthread_attr_t *attr) +// CHECK: Loaded summary for: int pthread_attr_init(pthread_attr_t *attr) +// CHECK: Loaded summary for: int pthread_attr_getstacksize(const pthread_attr_t *restrict attr, size_t *restrict stacksize) +// CHECK: Loaded summary for: int pthread_attr_getguardsize(const pthread_attr_t *restrict attr, size_t *restrict guardsize) +// CHECK: Loaded summary for: int pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize) +// CHECK: Loaded summary for: int pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize) +// CHECK: Loaded summary for: int pthread_mutex_init(pthread_mutex_t *restrict mutex, const pthread_mutexattr_t *restrict attr) +// CHECK: Loaded summary for: int pthread_mutex_destroy(pthread_mutex_t *mutex) +// CHECK: Loaded summary for: int pthread_mutex_lock(pthread_mutex_t *mutex) +// CHECK: Loaded summary for: int pthread_mutex_trylock(pthread_mutex_t *mutex) +// CHECK: Loaded summary for: int pthread_mutex_unlock(pthread_mutex_t *mutex) long a64l(const char *str64); char *l64a(long value); @@ -259,6 +273,34 @@ int clock_gettime(clockid_t clock_id, struct timespec *tp); struct itimerval; int getitimer(int which, struct itimerval *curr_value); +typedef union { + int x; +} pthread_cond_t; +int pthread_cond_signal(pthread_cond_t *cond); +int pthread_cond_broadcast(pthread_cond_t *cond); +typedef union { + int x; +} pthread_attr_t; +typedef unsigned long int pthread_t; +int pthread_create(pthread_t *restrict thread, const pthread_attr_t *restrict attr, void *(*start_routine)(void *), void *restrict arg); +int pthread_attr_destroy(pthread_attr_t *attr); +int pthread_attr_init(pthread_attr_t *attr); +int pthread_attr_getstacksize(const pthread_attr_t *restrict attr, size_t *restrict stacksize); +int pthread_attr_getguardsize(const pthread_attr_t *restrict attr, size_t *restrict guardsize); +int pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize); +int pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize); +typedef union { + int x; +} pthread_mutex_t; +typedef union { + int x; +} pthread_mutexattr_t; +int pthread_mutex_init(pthread_mutex_t *restrict mutex, const pthread_mutexattr_t *restrict attr); +int pthread_mutex_destroy(pthread_mutex_t *mutex); +int pthread_mutex_lock(pthread_mutex_t *mutex); +int pthread_mutex_trylock(pthread_mutex_t *mutex); +int pthread_mutex_unlock(pthread_mutex_t *mutex); + // Must have at least one call expression to initialize the summary map. int bar(void); void foo() { From 7e5dab5fca4b154f12d3a313a6bdbd507f2314be Mon Sep 17 00:00:00 2001 From: Richard Barton Date: Mon, 7 Sep 2020 16:33:55 +0100 Subject: [PATCH 369/465] [flang] Spelling and format edits to README.txt. NFC. --- flang/README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/flang/README.md b/flang/README.md index fafc1f91a421f..3a58c277bacf3 100644 --- a/flang/README.md +++ b/flang/README.md @@ -159,7 +159,7 @@ make test check-all To run individual regression tests llvm-lit needs to know the lit configuration for flang. The parameters in charge of this are: -flang_site_config and flang_config. And they can be set as shown bellow: +flang_site_config and flang_config. And they can be set as shown below: ``` /llvm-lit \ --param flang_site_config=/test-lit/lit.site.cfg.py \ @@ -214,9 +214,11 @@ To generate doxygen-style documentation from source code cd ~/llvm-project/build cmake -DLLVM_ENABLE_DOXYGEN=ON -DFLANG_INCLUDE_DOCS=ON ../llvm make doxygen-flang +``` It will generate html in +``` /tools/flang/docs/doxygen/html # for flang docs ``` ## Generate Sphinx-based Documentation @@ -227,17 +229,18 @@ is mostly meant to be processed by the Sphinx documentation generation system to create HTML pages which would be hosted on the webpage of flang and updated periodically. -If you would like to generate and view the HTML locally, install -Sphinx and then: - +If you would like to generate and view the HTML locally: +- Install [Sphinx](http://sphinx-doc.org/), including the [sphinx-markdown-tables](https://pypi.org/project/sphinx-markdown-tables/) extension. - Pass `-DLLVM_ENABLE_SPHINX=ON -DSPHINX_WARNINGS_AS_ERRORS=OFF` to the cmake command. ``` cd ~/llvm-project/build cmake -DLLVM_ENABLE_SPHINX=ON -DSPHINX_WARNINGS_AS_ERRORS=OFF ../llvm make docs-flang-html +``` It will generate html in +``` $BROWSER /tools/flang/docs/html/ ``` From 8248c2af94975912b14e7e0cb414fcbb82c77123 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Mon, 7 Sep 2020 17:15:15 +0200 Subject: [PATCH 370/465] [analyzer][StdLibraryFunctionsChecker] Have proper weak dependencies We want the generice StdLibraryFunctionsChecker to report only if there are no specific checkers that would handle the argument constraint for a function. Note, the assumptions are still evaluated, even if the arguement constraint checker is set to not report. This means that the assumptions made in the generic StdLibraryFunctionsChecker should be an over-approximation of the assumptions made in the specific checkers. But most importantly, the assumptions should not contradict. Differential Revision: https://reviews.llvm.org/D87240 --- .../clang/StaticAnalyzer/Checkers/Checkers.td | 3 +- .../test/Analysis/analyzer-enabled-checkers.c | 2 +- ...c-library-functions-arg-enabled-checkers.c | 66 +++++++++++++++++++ .../std-c-library-functions-arg-weakdeps.c | 64 ++++++++++++++++++ 4 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c create mode 100644 clang/test/Analysis/std-c-library-functions-arg-weakdeps.c diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index a444843c50060..a61af45231348 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -349,7 +349,6 @@ let ParentPackage = APIModeling in { def StdCLibraryFunctionsChecker : Checker<"StdCLibraryFunctions">, HelpText<"Improve modeling of the C standard library functions">, - Dependencies<[CallAndMessageModeling]>, CheckerOptions<[ CmdLineOption, "such as whether the parameter of isalpha is in the range [0, 255] " "or is EOF.">, Dependencies<[StdCLibraryFunctionsChecker]>, - WeakDependencies<[NonNullParamChecker]>, + WeakDependencies<[CallAndMessageChecker, NonNullParamChecker, StreamChecker]>, Documentation; } // end "alpha.unix" diff --git a/clang/test/Analysis/analyzer-enabled-checkers.c b/clang/test/Analysis/analyzer-enabled-checkers.c index 7c00e78c16acd..bef786a1a59b6 100644 --- a/clang/test/Analysis/analyzer-enabled-checkers.c +++ b/clang/test/Analysis/analyzer-enabled-checkers.c @@ -6,11 +6,11 @@ // CHECK: OVERVIEW: Clang Static Analyzer Enabled Checkers List // CHECK-EMPTY: -// CHECK-NEXT: core.CallAndMessageModeling // CHECK-NEXT: apiModeling.StdCLibraryFunctions // CHECK-NEXT: apiModeling.TrustNonnull // CHECK-NEXT: apiModeling.llvm.CastValue // CHECK-NEXT: apiModeling.llvm.ReturnValue +// CHECK-NEXT: core.CallAndMessageModeling // CHECK-NEXT: core.CallAndMessage // CHECK-NEXT: core.DivideZero // CHECK-NEXT: core.DynamicTypePropagation diff --git a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c new file mode 100644 index 0000000000000..9ad1be0538517 --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c @@ -0,0 +1,66 @@ +// Here we test the order of the Checkers when StdCLibraryFunctionArgs is +// enabled. + +// RUN: %clang --analyze %s --target=x86_64-pc-linux-gnu \ +// RUN: -Xclang -analyzer-checker=core \ +// RUN: -Xclang -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -Xclang -analyzer-config \ +// RUN: -Xclang apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -Xclang -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -Xclang -analyzer-checker=alpha.unix.Stream \ +// RUN: -Xclang -analyzer-list-enabled-checkers \ +// RUN: -Xclang -analyzer-display-progress \ +// RUN: 2>&1 | FileCheck %s --implicit-check-not=ANALYZE \ +// RUN: --implicit-check-not=\. + +// CHECK: OVERVIEW: Clang Static Analyzer Enabled Checkers List +// CHECK-EMPTY: +// CHECK-NEXT: core.CallAndMessageModeling +// CHECK-NEXT: core.CallAndMessage +// CHECK-NEXT: core.NonNullParamChecker +// CHECK-NEXT: alpha.unix.Stream +// CHECK-NEXT: apiModeling.StdCLibraryFunctions +// CHECK-NEXT: alpha.unix.StdCLibraryFunctionArgs +// CHECK-NEXT: apiModeling.TrustNonnull +// CHECK-NEXT: apiModeling.llvm.CastValue +// CHECK-NEXT: apiModeling.llvm.ReturnValue +// CHECK-NEXT: core.DivideZero +// CHECK-NEXT: core.DynamicTypePropagation +// CHECK-NEXT: core.NonnilStringConstants +// CHECK-NEXT: core.NullDereference +// CHECK-NEXT: core.StackAddrEscapeBase +// CHECK-NEXT: core.StackAddressEscape +// CHECK-NEXT: core.UndefinedBinaryOperatorResult +// CHECK-NEXT: core.VLASize +// CHECK-NEXT: core.builtin.BuiltinFunctions +// CHECK-NEXT: core.builtin.NoReturnFunctions +// CHECK-NEXT: core.uninitialized.ArraySubscript +// CHECK-NEXT: core.uninitialized.Assign +// CHECK-NEXT: core.uninitialized.Branch +// CHECK-NEXT: core.uninitialized.CapturedBlockVariable +// CHECK-NEXT: core.uninitialized.UndefReturn +// CHECK-NEXT: deadcode.DeadStores +// CHECK-NEXT: nullability.NullabilityBase +// CHECK-NEXT: nullability.NullPassedToNonnull +// CHECK-NEXT: nullability.NullReturnedFromNonnull +// CHECK-NEXT: security.insecureAPI.SecuritySyntaxChecker +// CHECK-NEXT: security.insecureAPI.UncheckedReturn +// CHECK-NEXT: security.insecureAPI.getpw +// CHECK-NEXT: security.insecureAPI.gets +// CHECK-NEXT: security.insecureAPI.mkstemp +// CHECK-NEXT: security.insecureAPI.mktemp +// CHECK-NEXT: security.insecureAPI.vfork +// CHECK-NEXT: unix.API +// CHECK-NEXT: unix.cstring.CStringModeling +// CHECK-NEXT: unix.DynamicMemoryModeling +// CHECK-NEXT: unix.Malloc +// CHECK-NEXT: unix.MallocSizeof +// CHECK-NEXT: unix.MismatchedDeallocator +// CHECK-NEXT: unix.Vfork +// CHECK-NEXT: unix.cstring.BadSizeArg +// CHECK-NEXT: unix.cstring.NullArg + +int main() { + int i; + (void)(10 / i); +} diff --git a/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c new file mode 100644 index 0000000000000..0ad3c277dfd7d --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c @@ -0,0 +1,64 @@ +// Check that the more specific checkers report and not the generic +// StdCLibraryFunctionArgs checker. + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -analyzer-checker=alpha.unix.Stream \ +// RUN: -triple x86_64-unknown-linux-gnu \ +// RUN: -verify + + +// Make sure that all used functions have their summary loaded. + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -analyzer-checker=alpha.unix.Stream \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -triple x86_64-unknown-linux 2>&1 | FileCheck %s + +// CHECK: Loaded summary for: int isalnum(int) +// CHECK: Loaded summary for: unsigned long fread(void *restrict, size_t, size_t, FILE *restrict) __attribute__((nonnull(1))) +// CHECK: Loaded summary for: int fileno(FILE *stream) + +void initializeSummaryMap(); +// We analyze this function first, and the call expression inside initializes +// the summary map. This way we force the loading of the summaries. The +// summaries would not be loaded without this because during the first bug +// report in WeakDependency::checkPreCall we stop further evaluation. And +// StdLibraryFunctionsChecker lazily initializes its summary map from its +// checkPreCall. +void analyzeThisFirst() { + initializeSummaryMap(); +} + +typedef __typeof(sizeof(int)) size_t; +struct FILE; +typedef struct FILE FILE; + +int isalnum(int); +size_t fread(void *restrict, size_t, size_t, FILE *restrict) __attribute__((nonnull(1))); +int fileno(FILE *stream); + +void test_uninit_arg() { + int v; + int r = isalnum(v); // \ + // expected-warning{{1st function call argument is an uninitialized value [core.CallAndMessage]}} + (void)r; +} + +void test_notnull_arg(FILE *F) { + int *p = 0; + fread(p, sizeof(int), 5, F); // \ + expected-warning{{Null pointer passed to 1st parameter expecting 'nonnull' [core.NonNullParamChecker]}} +} + +void test_notnull_stream_arg() { + fileno(0); // \ + // expected-warning{{Stream pointer might be NULL [alpha.unix.Stream]}} +} From eb482afaf5bbf3abf9d02c3810e418945c68a936 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 7 Sep 2020 16:16:52 +0100 Subject: [PATCH 371/465] Reduce the number of memory allocations when displaying a warning about clobbering reserved registers (NFC). Also address some minor inefficiencies and style issues. Differential Revision: https://reviews.llvm.org/D86088 --- .../AsmPrinter/AsmPrinterInlineAsm.cpp | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 538107cecd8b3..57bf500ba8923 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -547,22 +548,23 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS); // Emit warnings if we use reserved registers on the clobber list, as - // that might give surprising results. - std::vector RestrRegs; + // that might lead to undefined behaviour. + SmallVector RestrRegs; + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); // Start with the first operand descriptor, and iterate over them. for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands(); I < NumOps; ++I) { const MachineOperand &MO = MI->getOperand(I); - if (MO.isImm()) { - unsigned Flags = MO.getImm(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber && - !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) { - RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg())); - } - // Skip to one before the next operand descriptor, if it exists. - I += InlineAsm::getNumOperandRegisters(Flags); + if (!MO.isImm()) + continue; + unsigned Flags = MO.getImm(); + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber) { + Register Reg = MI->getOperand(I + 1).getReg(); + if (!TRI->isAsmClobberable(*MF, Reg)) + RestrRegs.push_back(Reg); } + // Skip to one before the next operand descriptor, if it exists. + I += InlineAsm::getNumOperandRegisters(Flags); } if (!RestrRegs.empty()) { @@ -572,14 +574,15 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin()); std::string Msg = "inline asm clobber list contains reserved registers: "; - for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) { + for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; ++I) { if(I != RestrRegs.begin()) Msg += ", "; - Msg += *I; + Msg += TRI->getName(*I); } - std::string Note = "Reserved registers on the clobber list may not be " - "preserved across the asm statement, and clobbering them may " - "lead to undefined behaviour."; + const char *Note = + "Reserved registers on the clobber list may not be " + "preserved across the asm statement, and clobbering them may " + "lead to undefined behaviour."; SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg); SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note); } From 2480a31e5d69a5c2e8e900be3a7f706d77f5a5cc Mon Sep 17 00:00:00 2001 From: alex-t Date: Mon, 7 Sep 2020 18:57:27 +0300 Subject: [PATCH 372/465] [AMDGPU] SILowerControlFlow::optimizeEndCF should remove empty basic block optimizeEndCF removes EXEC restoring instruction case this instruction is the only one except the branch to the single successor and that successor contains EXEC mask restoring instruction that was lowered from END_CF belonging to IF_ELSE. As a result of such optimization we get the basic block with the only one instruction that is a branch to the single successor. In case the control flow can reach such an empty block from S_CBRANCH_EXEZ/EXECNZ it might happen that spill/reload instructions that were inserted later by register allocator are placed under exec == 0 condition and never execute. Removing empty block solves the problem. This change require further work to re-implement LIS updates. Recently, LIS is always nullptr in this pass. To enable it we need another patch to fix many places across the codegen. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D86634 --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 47 +++++++++++++++++-- llvm/test/CodeGen/AMDGPU/collapse-endcf.mir | 32 +++++-------- 2 files changed, 55 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 0246c6508e9f2..914668f2b68a2 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -113,6 +113,8 @@ class SILowerControlFlow : public MachineFunctionPass { void combineMasks(MachineInstr &MI); + bool removeMBBifRedundant(MachineBasicBlock &MBB); + void process(MachineInstr &MI); // Skip to the next instruction, ignoring debug instructions, and trivial @@ -154,9 +156,6 @@ class SILowerControlFlow : public MachineFunctionPass { AU.addPreserved(); AU.addPreserved(); AU.addPreservedID(LiveVariablesID); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -604,6 +603,7 @@ void SILowerControlFlow::optimizeEndCf() { if (LIS) LIS->RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); + removeMBBifRedundant(MBB); } } } @@ -658,6 +658,47 @@ void SILowerControlFlow::process(MachineInstr &MI) { } } +bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) { + bool Redundant = true; + for (auto &I : MBB.instrs()) { + if (!I.isDebugInstr() && !I.isUnconditionalBranch()) + Redundant = false; + } + if (Redundant) { + MachineBasicBlock *Succ = *MBB.succ_begin(); + SmallVector Preds(MBB.predecessors()); + for (auto P : Preds) { + P->replaceSuccessor(&MBB, Succ); + MachineBasicBlock::iterator I(P->getFirstInstrTerminator()); + while (I != P->end()) { + if (I->isBranch()) { + if (TII->getBranchDestBlock(*I) == &MBB) { + I->getOperand(0).setMBB(Succ); + break; + } + } + I++; + } + if (I == P->end()) { + MachineFunction *MF = P->getParent(); + MachineFunction::iterator InsertPt = + P->getNextNode() ? MachineFunction::iterator(P->getNextNode()) + : MF->end(); + MF->splice(InsertPt, Succ); + } + } + MBB.removeSuccessor(Succ); + if (LIS) { + for (auto &I : MBB.instrs()) + LIS->RemoveMachineInstrFromMaps(I); + } + MBB.clear(); + MBB.eraseFromParent(); + return true; + } + return false; +} + bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir index d50973c9abf99..e87f1e7dc8dd0 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -16,16 +16,13 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) - ; GCN: DBG_VALUE ; GCN: bb.4: ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc ; GCN: DBG_VALUE @@ -68,14 +65,12 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: ; GCN: successors: %bb.5(0x80000000) @@ -118,14 +113,12 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: ; GCN: successors: %bb.5(0x80000000) @@ -387,22 +380,19 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.5(0x80000000) - ; GCN: S_BRANCH %bb.5 - ; GCN: bb.4: - ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc - ; GCN: S_ENDPGM 0 ; GCN: bb.5: ; GCN: successors: %bb.4(0x80000000) ; GCN: S_BRANCH %bb.4 + ; GCN: bb.4: + ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 From 11d8eedfa5b796a9ba0276a5e4bad8b9e549f0b6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 11:40:59 -0400 Subject: [PATCH 373/465] [InstCombine] move/add tests for icmp with mul operands; NFC --- llvm/test/Transforms/InstCombine/icmp-mul.ll | 311 +++++++++++++++++++ llvm/test/Transforms/InstCombine/icmp.ll | 52 ---- 2 files changed, 311 insertions(+), 52 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/icmp-mul.ll b/llvm/test/Transforms/InstCombine/icmp-mul.ll index 8e7d9056726e4..719150054015e 100644 --- a/llvm/test/Transforms/InstCombine/icmp-mul.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul.ll @@ -365,3 +365,314 @@ define i1 @ne_rem_zero_nonuw(i8 %x) { %b = icmp ne i8 %a, 30 ret i1 %b } + +define i1 @mul_constant_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_eq( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 5 + %B = mul i32 %y, 5 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_splat( +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul <2 x i32> %x, + %B = mul <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + call void @use(i8 %A) + %B = mul i8 %y, 5 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_extra_use2( +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 +; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + %B = mul i8 %y, 5 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + call void @use(i8 %A) + %B = mul i8 %y, 5 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nsw(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_eq_nsw( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i32 %x, 6 + %B = mul nsw i32 %y, 6 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_nsw_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_splat( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul nsw <2 x i32> %x, + %B = mul nsw <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_nsw_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 74 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 74 +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 74 + call void @use(i8 %A) + %B = mul nsw i8 %y, 74 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nsw_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_nsw_extra_use2( +; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 20 +; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 20 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 20 + %B = mul nsw i8 %y, 20 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_nsw_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 24 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 24 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 24 + call void @use(i8 %A) + %B = mul nsw i8 %y, 24 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_nuw_eq( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 22 + %B = mul nuw i32 %y, 22 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_nuw_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_splat( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul nuw <2 x i32> %x, + %B = mul nuw <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_nuw_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 6 +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 6 + call void @use(i8 %A) + %B = mul nuw i8 %y, 6 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nuw_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_nuw_extra_use2( +; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 36 +; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 36 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 36 + %B = mul nuw i8 %y, 36 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_nuw_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 38 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 38 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 38 + call void @use(i8 %A) + %B = mul nuw i8 %y, 38 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ult(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_ult( +; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], 47 +; CHECK-NEXT: [[B:%.*]] = mul i32 [[Y:%.*]], 47 +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 47 + %B = mul i32 %y, 47 + %C = icmp ult i32 %A, %B + ret i1 %C +} + +define i1 @mul_constant_nuw_sgt(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_nuw_sgt( +; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 +; CHECK-NEXT: [[B:%.*]] = mul nuw i32 [[Y:%.*]], 46 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 46 + %B = mul nuw i32 %y, 46 + %C = icmp sgt i32 %A, %B + ret i1 %C +} + +define i1 @mul_mismatch_constant_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_mismatch_constant_nuw_eq( +; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 +; CHECK-NEXT: [[B:%.*]] = mul nuw i32 [[Y:%.*]], 44 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 46 + %B = mul nuw i32 %y, 44 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +; If the multiply constant has any trailing zero bits but could overflow, +; we get something completely different. +; We mask off the high bits of each input and then convert: +; (X&Z) == (Y&Z) -> (X^Y) & Z == 0 + +define i1 @mul_constant_partial_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_partial_nuw_eq( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 44 + %B = mul nuw i32 %y, 44 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define i1 @mul_constant_mismatch_wrap_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_mismatch_wrap_eq( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i32 %x, 54 + %B = mul nuw i32 %y, 54 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) { +; CHECK-LABEL: @eq_mul_constants_with_tz( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 +; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 12 + %B = mul i32 %y, 12 + %C = icmp ne i32 %A, %B + ret i1 %C +} + +define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @eq_mul_constants_with_tz_splat( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul <2 x i32> %x, + %B = mul <2 x i32> %y, + %C = icmp eq <2 x i32> %A, %B + ret <2 x i1> %C +} diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index a9bda13e15b90..683518121789c 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -3397,58 +3397,6 @@ define i1 @eq_add_constants(i32 %x, i32 %y) { ret i1 %C } -define i1 @eq_mul_constants(i32 %x, i32 %y) { -; CHECK-LABEL: @eq_mul_constants( -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[C]] -; - %A = mul i32 %x, 5 - %B = mul i32 %y, 5 - %C = icmp eq i32 %A, %B - ret i1 %C -} - -define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @eq_mul_constants_splat( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: ret <2 x i1> [[C]] -; - %A = mul <2 x i32> %x, - %B = mul <2 x i32> %y, - %C = icmp ne <2 x i32> %A, %B - ret <2 x i1> %C -} - -; If the multiply constant has any trailing zero bits, we get something completely different. -; We mask off the high bits of each input and then convert: -; (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - -define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) { -; CHECK-LABEL: @eq_mul_constants_with_tz( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: ret i1 [[C]] -; - %A = mul i32 %x, 12 - %B = mul i32 %y, 12 - %C = icmp ne i32 %A, %B - ret i1 %C -} - -define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @eq_mul_constants_with_tz_splat( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[C]] -; - %A = mul <2 x i32> %x, - %B = mul <2 x i32> %y, - %C = icmp eq <2 x i32> %A, %B - ret <2 x i1> %C -} - declare i32 @llvm.bswap.i32(i32) define i1 @bswap_ne(i32 %x, i32 %y) { From 7a6d6f0f7046f6ebcbf06eaf8f996d991a90e440 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 12:37:59 -0400 Subject: [PATCH 374/465] [InstCombine] improve folds for icmp with multiply operands (PR47432) Check for no overflow along with an odd constant before we lose information by converting to bitwise logic. https://rise4fun.com/Alive/2Xl Pre: C1 != 0 %mx = mul nsw i8 %x, C1 %my = mul nsw i8 %y, C1 %r = icmp eq i8 %mx, %my => %r = icmp eq i8 %x, %y Name: nuw ne Pre: C1 != 0 %mx = mul nuw i8 %x, C1 %my = mul nuw i8 %y, C1 %r = icmp ne i8 %mx, %my => %r = icmp ne i8 %x, %y Name: odd ne Pre: C1 % 2 != 0 %mx = mul i8 %x, C1 %my = mul i8 %y, C1 %r = icmp ne i8 %mx, %my => %r = icmp ne i8 %x, %y --- .../InstCombine/InstCombineCompares.cpp | 17 +++++-- llvm/test/Transforms/InstCombine/icmp-mul.ll | 46 ++++++++----------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 350d00095c6f1..608017b6dca25 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3983,6 +3983,19 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, ConstantExpr::getNeg(RHSC)); } + { + // Try to remove shared constant multiplier from equality comparison: + // X * C == Y * C (with no overflowing/aliasing) --> X == Y + Value *X, *Y; + const APInt *C; + if (match(Op0, m_Mul(m_Value(X), m_APInt(C))) && *C != 0 && + match(Op1, m_Mul(m_Value(Y), m_SpecificInt(*C))) && I.isEquality()) + if (!C->countTrailingZeros() || + (BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) || + (BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap())) + return new ICmpInst(Pred, X, Y); + } + BinaryOperator *SRem = nullptr; // icmp (srem X, Y), Y if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1)) @@ -4059,10 +4072,6 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask); return new ICmpInst(Pred, And1, And2); } - // If there are no trailing zeros in the multiplier, just eliminate - // the multiplies (no masking is needed): - // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y - return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); } break; } diff --git a/llvm/test/Transforms/InstCombine/icmp-mul.ll b/llvm/test/Transforms/InstCombine/icmp-mul.ll index 719150054015e..e2aff1c304adf 100644 --- a/llvm/test/Transforms/InstCombine/icmp-mul.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul.ll @@ -392,8 +392,7 @@ define i1 @mul_constant_ne_extra_use1(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_ne_extra_use1( ; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 ; CHECK-NEXT: call void @use(i8 [[A]]) -; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul i8 %x, 5 @@ -405,10 +404,9 @@ define i1 @mul_constant_ne_extra_use1(i8 %x, i8 %y) { define i1 @mul_constant_eq_extra_use2(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_eq_extra_use2( -; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 ; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul i8 %x, 5 @@ -424,7 +422,7 @@ define i1 @mul_constant_ne_extra_use3(i8 %x, i8 %y) { ; CHECK-NEXT: call void @use(i8 [[A]]) ; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul i8 %x, 5 @@ -437,9 +435,7 @@ define i1 @mul_constant_ne_extra_use3(i8 %x, i8 %y) { define i1 @mul_constant_eq_nsw(i32 %x, i32 %y) { ; CHECK-LABEL: @mul_constant_eq_nsw( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nsw i32 %x, 6 @@ -450,9 +446,7 @@ define i1 @mul_constant_eq_nsw(i32 %x, i32 %y) { define <2 x i1> @mul_constant_ne_nsw_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @mul_constant_ne_nsw_splat( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = mul nsw <2 x i32> %x, @@ -465,8 +459,7 @@ define i1 @mul_constant_ne_nsw_extra_use1(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_ne_nsw_extra_use1( ; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 74 ; CHECK-NEXT: call void @use(i8 [[A]]) -; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 74 -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nsw i8 %x, 74 @@ -478,10 +471,9 @@ define i1 @mul_constant_ne_nsw_extra_use1(i8 %x, i8 %y) { define i1 @mul_constant_eq_nsw_extra_use2(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_eq_nsw_extra_use2( -; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 20 ; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 20 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nsw i8 %x, 20 @@ -497,7 +489,7 @@ define i1 @mul_constant_ne_nsw_extra_use3(i8 %x, i8 %y) { ; CHECK-NEXT: call void @use(i8 [[A]]) ; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 24 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nsw i8 %x, 24 @@ -510,9 +502,7 @@ define i1 @mul_constant_ne_nsw_extra_use3(i8 %x, i8 %y) { define i1 @mul_constant_nuw_eq(i32 %x, i32 %y) { ; CHECK-LABEL: @mul_constant_nuw_eq( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nuw i32 %x, 22 @@ -523,9 +513,7 @@ define i1 @mul_constant_nuw_eq(i32 %x, i32 %y) { define <2 x i1> @mul_constant_ne_nuw_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @mul_constant_ne_nuw_splat( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = mul nuw <2 x i32> %x, @@ -538,8 +526,7 @@ define i1 @mul_constant_ne_nuw_extra_use1(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_ne_nuw_extra_use1( ; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 6 ; CHECK-NEXT: call void @use(i8 [[A]]) -; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 6 -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nuw i8 %x, 6 @@ -551,10 +538,9 @@ define i1 @mul_constant_ne_nuw_extra_use1(i8 %x, i8 %y) { define i1 @mul_constant_eq_nuw_extra_use2(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_eq_nuw_extra_use2( -; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 36 ; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 36 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nuw i8 %x, 36 @@ -570,7 +556,7 @@ define i1 @mul_constant_ne_nuw_extra_use3(i8 %x, i8 %y) { ; CHECK-NEXT: call void @use(i8 [[A]]) ; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 38 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nuw i8 %x, 38 @@ -581,6 +567,8 @@ define i1 @mul_constant_ne_nuw_extra_use3(i8 %x, i8 %y) { ret i1 %C } +; Negative test - wrong pred + define i1 @mul_constant_ult(i32 %x, i32 %y) { ; CHECK-LABEL: @mul_constant_ult( ; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], 47 @@ -594,6 +582,8 @@ define i1 @mul_constant_ult(i32 %x, i32 %y) { ret i1 %C } +; Negative test - wrong pred + define i1 @mul_constant_nuw_sgt(i32 %x, i32 %y) { ; CHECK-LABEL: @mul_constant_nuw_sgt( ; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 @@ -607,6 +597,8 @@ define i1 @mul_constant_nuw_sgt(i32 %x, i32 %y) { ret i1 %C } +; Negative test - wrong constants + define i1 @mul_mismatch_constant_nuw_eq(i32 %x, i32 %y) { ; CHECK-LABEL: @mul_mismatch_constant_nuw_eq( ; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 From 1c34ac03a2de0e10f95f16526296dcae5166d129 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 16:56:57 +0100 Subject: [PATCH 375/465] LeonPasses.h - remove orphan function declarations. NFCI. The implementations no longer exist. --- llvm/lib/Target/Sparc/LeonPasses.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/Sparc/LeonPasses.h b/llvm/lib/Target/Sparc/LeonPasses.h index b165bc93780f6..6baf9fddbe508 100644 --- a/llvm/lib/Target/Sparc/LeonPasses.h +++ b/llvm/lib/Target/Sparc/LeonPasses.h @@ -33,13 +33,11 @@ class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass protected: LEONMachineFunctionPass(char &ID); - int GetRegIndexForOperand(MachineInstr &MI, int OperandIndex); void clearUsedRegisterList() { UsedRegisters.clear(); } void markRegisterUsed(int registerIndex) { UsedRegisters.push_back(registerIndex); } - int getUnusedFPRegister(MachineRegisterInfo &MRI); }; class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass { From dfc333050b544173741b66f27872cebb2b7ab983 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 17:09:42 +0100 Subject: [PATCH 376/465] LeonPasses.h - remove unnecessary includes. NFCI. Reduce to forward declarations and move includes to LeonPasses.cpp where necessary. --- llvm/lib/Target/Sparc/LeonPasses.cpp | 7 +++---- llvm/lib/Target/Sparc/LeonPasses.h | 7 ++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/Sparc/LeonPasses.cpp b/llvm/lib/Target/Sparc/LeonPasses.cpp index e9d3aaeb9cfe2..6ad6940c6b51b 100644 --- a/llvm/lib/Target/Sparc/LeonPasses.cpp +++ b/llvm/lib/Target/Sparc/LeonPasses.cpp @@ -10,14 +10,13 @@ //===----------------------------------------------------------------------===// #include "LeonPasses.h" -#include "llvm/CodeGen/ISDOpcodes.h" +#include "SparcSubtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID) diff --git a/llvm/lib/Target/Sparc/LeonPasses.h b/llvm/lib/Target/Sparc/LeonPasses.h index 6baf9fddbe508..9bc4569a12984 100644 --- a/llvm/lib/Target/Sparc/LeonPasses.h +++ b/llvm/lib/Target/Sparc/LeonPasses.h @@ -12,14 +12,11 @@ #ifndef LLVM_LIB_TARGET_SPARC_LEON_PASSES_H #define LLVM_LIB_TARGET_SPARC_LEON_PASSES_H -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" - -#include "Sparc.h" -#include "SparcSubtarget.h" namespace llvm { +class SparcSubtarget; + class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass : public MachineFunctionPass { protected: From 95ca3aacf0f82955e9d259484b886c260337285c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 17:50:58 +0100 Subject: [PATCH 377/465] BTFDebug.h - reduce MachineInstr.h include to forward declaration. NFCI. --- llvm/lib/Target/BPF/BTFDebug.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index db5b5633f6d90..1bad0d11fee4b 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -16,7 +16,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/DebugHandlerBase.h" -#include "llvm/CodeGen/MachineInstr.h" +#include +#include #include #include #include "BTF.h" @@ -27,9 +28,12 @@ class AsmPrinter; class BTFDebug; class DIType; class GlobalVariable; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class MCInst; class MCStreamer; class MCSymbol; -class MachineFunction; /// The base class for BTF type generation. class BTFTypeBase { From 4e89a0ab02148c71d5be076e0d7262e93010006b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 18:15:26 +0100 Subject: [PATCH 378/465] MipsISelLowering.h - remove CCState/CCValAssign forward declarations. NFCI. These are already defined in the CallingConvLower.h include. --- llvm/lib/Target/Mips/MipsISelLowering.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 0c5df4ba1bade..03933d8205766 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -40,8 +40,6 @@ namespace llvm { class Argument; -class CCState; -class CCValAssign; class FastISel; class FunctionLoweringInfo; class MachineBasicBlock; From 5ea9e655efdd1188d9864a6c97a7a9b772559ff5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 18:35:06 +0100 Subject: [PATCH 379/465] VPlan.h - remove unnecessary forward declarations. NFCI. Already defined in includes. --- llvm/lib/Transforms/Vectorize/VPlan.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 078b2ba1c70ac..9c9e2ec8222d1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -51,14 +51,12 @@ namespace llvm { class BasicBlock; class DominatorTree; class InnerLoopVectorizer; -template class InterleaveGroup; class LoopInfo; class raw_ostream; class RecurrenceDescriptor; class Value; class VPBasicBlock; class VPRegionBlock; -class VPSlotTracker; class VPlan; class VPlanSlp; From e52e7ad54defa3a95040b680beff2824c9c6fbb7 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Mon, 7 Sep 2020 13:57:06 -0400 Subject: [PATCH 380/465] [ms] [llvm-ml] Add support for bitwise named operators (AND, NOT, OR) in MASM Add support for expressions of the form '1 or 2', etc. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D86944 --- llvm/lib/MC/MCParser/MasmParser.cpp | 35 +++++++++++++------ .../llvm-ml/named_bitwise_operators.test | 20 +++++++++++ 2 files changed, 44 insertions(+), 11 deletions(-) create mode 100644 llvm/test/tools/llvm-ml/named_bitwise_operators.test diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 45165ffe3cac0..94cef83bc405e 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -1314,7 +1314,7 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { /// primaryexpr ::= symbol /// primaryexpr ::= number /// primaryexpr ::= '.' -/// primaryexpr ::= ~,+,- primaryexpr +/// primaryexpr ::= ~,+,-,'not' primaryexpr bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { SMLoc FirstTokenLoc = getLexer().getLoc(); AsmToken::TokenKind FirstTokenKind = Lexer.getKind(); @@ -1352,6 +1352,13 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return Error(FirstTokenLoc, "invalid token in expression"); } } + // Parse named bitwise negation. + if (Identifier.equals_lower("not")) { + if (parsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); + return false; + } // Parse symbol variant. std::pair Split; if (!MAI.useParensForSymbolVariant()) { @@ -1772,8 +1779,18 @@ bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc) { SMLoc StartLoc = Lexer.getLoc(); while (true) { + AsmToken::TokenKind TokKind = Lexer.getKind(); + if (Lexer.getKind() == AsmToken::Identifier) { + StringRef Identifier = Lexer.getTok().getString(); + if (Identifier.equals_lower("and")) + TokKind = AsmToken::Amp; + else if (Identifier.equals_lower("not")) + TokKind = AsmToken::Exclaim; + else if (Identifier.equals_lower("or")) + TokKind = AsmToken::Pipe; + } MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; - unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); + unsigned TokPrec = getBinOpPrecedence(TokKind, Kind); // If the next token is lower precedence than we are allowed to eat, return // successfully with what we ate already. @@ -3229,7 +3246,7 @@ bool MasmParser::parseScalarInitializer(unsigned Size, Lex(); } else { const MCExpr *Value; - if (checkForValidSection() || parseExpression(Value)) + if (parseExpression(Value)) return true; if (getTok().is(AsmToken::Identifier) && getTok().getString().equals_lower("dup")) { @@ -3449,6 +3466,9 @@ bool MasmParser::parseRealInstList(const fltSemantics &Semantics, // Initialize real data values. bool MasmParser::emitRealValues(const fltSemantics &Semantics) { + if (checkForValidSection()) + return true; + SmallVector ValuesAsInt; if (parseRealInstList(Semantics, ValuesAsInt)) return true; @@ -3468,8 +3488,7 @@ bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { Field.SizeOf = 0; - if (checkForValidSection() || - parseRealInstList(Semantics, RealInfo.AsIntValues)) + if (parseRealInstList(Semantics, RealInfo.AsIntValues)) return true; Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8; @@ -3486,9 +3505,6 @@ bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { /// ::= (real4 | real8) [ expression (, expression)* ] bool MasmParser::parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics) { - if (checkForValidSection()) - return true; - if (StructInProgress.empty()) { // Initialize data value. if (emitRealValues(Semantics)) @@ -3504,9 +3520,6 @@ bool MasmParser::parseDirectiveRealValue(StringRef IDVal, bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal, const fltSemantics &Semantics, StringRef Name, SMLoc NameLoc) { - if (checkForValidSection()) - return true; - if (StructInProgress.empty()) { // Initialize named data value. MCSymbol *Sym = getContext().getOrCreateSymbol(Name); diff --git a/llvm/test/tools/llvm-ml/named_bitwise_operators.test b/llvm/test/tools/llvm-ml/named_bitwise_operators.test new file mode 100644 index 0000000000000..f122dbe842d0f --- /dev/null +++ b/llvm/test/tools/llvm-ml/named_bitwise_operators.test @@ -0,0 +1,20 @@ +; RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data + +t1 BYTE NOT 1 +; CHECK: t1: +; CHECK-NEXT: .byte -2 + +t2 BYTE 1 OR 2 +; CHECK: t2: +; CHECK-NEXT: .byte 3 + +t3 BYTE 6 AND 10 +; CHECK: t3: +; CHECK-NEXT: .byte 2 + +.code +xor eax, eax + +END From 2feb6e9b8418b29c002bc830a3e2fdcbe9e39449 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Mon, 7 Sep 2020 13:58:55 -0400 Subject: [PATCH 381/465] [ms] [llvm-ml] Fix STRUCT field alignment MASM aligns fields to the _minimum_ of the STRUCT alignment value and the size of the next field. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D86945 --- llvm/lib/MC/MCParser/MasmParser.cpp | 48 ++++++++++++++++------------- llvm/test/tools/llvm-ml/struct.test | 32 +++++++++---------- 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 94cef83bc405e..333eef2f698fd 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -127,7 +127,7 @@ struct StructInfo { std::vector Fields; StringMap FieldsByName; - FieldInfo &addField(StringRef FieldName, FieldType FT); + FieldInfo &addField(StringRef FieldName, FieldType FT, size_t FieldSize); StructInfo() = default; @@ -330,7 +330,8 @@ struct FieldInfo { FieldInfo(FieldType FT) : Contents(FT) {} }; -FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT) { +FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT, + size_t FieldSize) { if (!FieldName.empty()) FieldsByName[FieldName] = Fields.size(); Fields.emplace_back(FT); @@ -338,7 +339,7 @@ FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT) { if (IsUnion) { Field.Offset = 0; } else { - Size = llvm::alignTo(Size, Alignment); + Size = llvm::alignTo(Size, std::min(Alignment, FieldSize)); Field.Offset = Size; } return Field; @@ -759,13 +760,14 @@ class MasmParser : public MCAsmParser { // "real4", "real8" bool emitRealValues(const fltSemantics &Semantics); - bool addRealField(StringRef Name, const fltSemantics &Semantics); - bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics); + bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size); + bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics, + size_t Size); bool parseRealInstList( const fltSemantics &Semantics, SmallVectorImpl &Values, const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); bool parseDirectiveNamedRealValue(StringRef IDVal, - const fltSemantics &Semantics, + const fltSemantics &Semantics, size_t Size, StringRef Name, SMLoc NameLoc); bool parseOptionalAngleBracketOpen(); @@ -2118,9 +2120,9 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, case DK_DQ: return parseDirectiveValue(IDVal, 8); case DK_REAL4: - return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle()); + return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4); case DK_REAL8: - return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble()); + return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8); case DK_STRUCT: case DK_UNION: return parseDirectiveNestedStruct(IDVal, DirKind); @@ -2343,12 +2345,12 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc); case DK_REAL4: Lex(); - return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), IDVal, - IDLoc); + return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4, + IDVal, IDLoc); case DK_REAL8: Lex(); - return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), IDVal, - IDLoc); + return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8, + IDVal, IDLoc); case DK_STRUCT: case DK_UNION: Lex(); @@ -3306,7 +3308,7 @@ bool MasmParser::emitIntegralValues(unsigned Size) { // Add a field to the current structure. bool MasmParser::addIntegralField(StringRef Name, unsigned Size) { StructInfo &Struct = StructInProgress.back(); - FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL); + FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size); IntFieldInfo &IntInfo = Field.Contents.IntInfo; Field.Type = Size; @@ -3481,9 +3483,10 @@ bool MasmParser::emitRealValues(const fltSemantics &Semantics) { } // Add a real field to the current struct. -bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { +bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics, + size_t Size) { StructInfo &Struct = StructInProgress.back(); - FieldInfo &Field = Struct.addField(Name, FT_REAL); + FieldInfo &Field = Struct.addField(Name, FT_REAL, Size); RealFieldInfo &RealInfo = Field.Contents.RealInfo; Field.SizeOf = 0; @@ -3504,12 +3507,13 @@ bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { /// parseDirectiveRealValue /// ::= (real4 | real8) [ expression (, expression)* ] bool MasmParser::parseDirectiveRealValue(StringRef IDVal, - const fltSemantics &Semantics) { + const fltSemantics &Semantics, + size_t Size) { if (StructInProgress.empty()) { // Initialize data value. if (emitRealValues(Semantics)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); - } else if (addRealField("", Semantics)) { + } else if (addRealField("", Semantics, Size)) { return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); } return false; @@ -3519,14 +3523,15 @@ bool MasmParser::parseDirectiveRealValue(StringRef IDVal, /// ::= name (real4 | real8) [ expression (, expression)* ] bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal, const fltSemantics &Semantics, - StringRef Name, SMLoc NameLoc) { + size_t Size, StringRef Name, + SMLoc NameLoc) { if (StructInProgress.empty()) { // Initialize named data value. MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitLabel(Sym); if (emitRealValues(Semantics)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); - } else if (addRealField(Name, Semantics)) { + } else if (addRealField(Name, Semantics, Size)) { return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); } return false; @@ -3956,7 +3961,7 @@ bool MasmParser::emitStructValues(const StructInfo &Structure) { // Declare a field in the current struct. bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) { StructInfo &OwningStruct = StructInProgress.back(); - FieldInfo &Field = OwningStruct.addField(Name, FT_STRUCT); + FieldInfo &Field = OwningStruct.addField(Name, FT_STRUCT, Structure.Size); StructFieldInfo &StructInfo = Field.Contents.StructInfo; StructInfo.Structure = Structure; @@ -4130,7 +4135,8 @@ bool MasmParser::parseDirectiveNestedEnds() { else ParentStruct.Size += Structure.Size; } else { - FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT); + FieldInfo &Field = + ParentStruct.addField(Structure.Name, FT_STRUCT, Structure.Size); StructFieldInfo &StructInfo = Field.Contents.StructInfo; Field.Type = Structure.Size; Field.LengthOf = 1; diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test index fa85ecd455dda..38fc763fc7e1f 100644 --- a/llvm/test/tools/llvm-ml/struct.test +++ b/llvm/test/tools/llvm-ml/struct.test @@ -34,11 +34,9 @@ t1 foobar <> ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 2 ; -; , with internal alignment padding +; , with no alignment padding (field size < alignment) ; CHECK-NEXT: .byte 6 -; CHECK-NEXT: .zero 1 ; CHECK-NEXT: .byte 7 -; CHECK-NEXT: .zero 1 ; ; BYTE "abcde", plus alignment padding ; CHECK-NEXT: .byte 97 @@ -65,11 +63,9 @@ t2 FOOBAR <"gh",,<10,11>,<12>,"ijk"> ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .byte 11 ; -; , with internal alignment padding +; , with no alignment padding (field size < alignment) ; CHECK-NEXT: .byte 12 -; CHECK-NEXT: .zero 1 ; CHECK-NEXT: .byte 7 -; CHECK-NEXT: .zero 1 ; ; BYTE "ijk", padded with " ", plus alignment padding ; CHECK-NEXT: .byte 105 @@ -87,16 +83,16 @@ mov eax, [t2].f.h mov eax, [t2.f.h] ; CHECK: t3: -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] t4: mov eax, j.FOOBAR.f.h mov eax, j.baz.b ; CHECK: t4: -; CHECK-NEXT: mov eax, dword ptr [rip + j+12] +; CHECK-NEXT: mov eax, dword ptr [rip + j+11] ; CHECK-NEXT: mov eax, dword ptr [rip + j+1] t5: @@ -105,9 +101,9 @@ mov eax, [ebx.FOOBAR].f.h mov eax, [ebx.FOOBAR.f.h] ; CHECK: t5: -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] t6: mov eax, t2.FOOBAR.f.h @@ -116,10 +112,10 @@ mov eax, [t2.FOOBAR].f.h mov eax, [t2.FOOBAR.f.h] ; CHECK: t6: -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] t7: mov eax, [ebx].FOOBAR.e.b @@ -185,7 +181,7 @@ mov eax, FOOBAR.f.h ; CHECK: t10: ; CHECK-NEXT: mov eax, 10 -; CHECK-NEXT: mov eax, 12 +; CHECK-NEXT: mov eax, 11 t11: mov eax, (FOOBAR PTR [ebx]).f From a3ec4a3158f3a60c16ac1e3550667866fe1d4171 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Mon, 7 Sep 2020 14:00:05 -0400 Subject: [PATCH 382/465] [ms] [llvm-ml] Allow use of locally-defined variables in expressions MASM allows variables defined by equate statements to be used in expressions. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D86946 --- llvm/lib/MC/MCParser/MasmParser.cpp | 5 +++++ llvm/test/tools/llvm-ml/variable.test | 13 +++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 llvm/test/tools/llvm-ml/variable.test diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 333eef2f698fd..4d62174f7e5e4 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -3076,6 +3076,11 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name, SMLoc EndLoc, StartLoc = Lexer.getLoc(); if (parseExpression(Expr, EndLoc)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name); + Sym->setRedefinable(Var.Redefinable); + Sym->setVariableValue(Expr); + Sym->setExternal(false); + if (Expr->evaluateAsAbsolute(Var.NumericValue, getStreamer().getAssemblerPtr())) return false; diff --git a/llvm/test/tools/llvm-ml/variable.test b/llvm/test/tools/llvm-ml/variable.test new file mode 100644 index 0000000000000..4e89d67bd59dd --- /dev/null +++ b/llvm/test/tools/llvm-ml/variable.test @@ -0,0 +1,13 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data +t1_value equ 1 or 2 + +t1 BYTE t1_value DUP (0) +; CHECK: t1: +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 0 +; CHECK-NOT: .byte 0 + +END From 7a06b166b1afb457a7df6ad73a6710b4dde4db68 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 14:11:06 -0400 Subject: [PATCH 383/465] [DAGCombiner] allow more store merging for non-i8 truncated ops This is a follow-up suggested in D86420 - if we have a pair of stores in inverted order for the target endian, we can rotate the source bits into place. The "be_i64_to_i16_order" test shows a limitation of the current function (which might be avoided if we integrate this function with the other cases in mergeConsecutiveStores). In the earlier "be_i64_to_i16" test, we skip the first 2 stores because we do not match the full set as consecutive or rotate-able, but then we reach the last 2 stores and see that they are an inverted pair of 16-bit stores. The "be_i64_to_i16_order" test alters the program order of the stores, so we miss matching the sub-pattern. Differential Revision: https://reviews.llvm.org/D87112 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +++++-- .../test/CodeGen/AArch64/merge-trunc-store.ll | 49 +++++++++---------- llvm/test/CodeGen/X86/stores-merging.ll | 22 +++------ 3 files changed, 43 insertions(+), 46 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 286d54386357f..37d8cdd695445 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7011,12 +7011,15 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { // Check if the offsets line up for the native data layout of this target. bool NeedBswap = false; + bool NeedRotate = false; if (!checkOffsets(Layout.isLittleEndian())) { // Special-case: check if byte offsets line up for the opposite endian. - // TODO: We could use rotates for 16/32-bit merge pairs. - if (NarrowNumBits != 8 || !checkOffsets(Layout.isBigEndian())) + if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian())) + NeedBswap = true; + else if (NumStores == 2 && checkOffsets(Layout.isBigEndian())) + NeedRotate = true; + else return SDValue(); - NeedBswap = true; } SDLoc DL(N); @@ -7026,11 +7029,16 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue); } - // Before legalize we can introduce illegal bswaps which will be later + // Before legalize we can introduce illegal bswaps/rotates which will be later // converted to an explicit bswap sequence. This way we end up with a single // store and byte shuffling instead of several stores and byte shuffling. - if (NeedBswap) + if (NeedBswap) { SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue); + } else if (NeedRotate) { + assert(WideNumBits % 2 == 0 && "Unexpected type for rotate"); + SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT); + SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt); + } SDValue NewStore = DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(), diff --git a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll index 3f8fa3e9e3837..b4c6e7736837a 100644 --- a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll +++ b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll @@ -207,9 +207,8 @@ define void @le_i32_to_i16(i32 %x, i16* %p0) { ; ; BE-LABEL: le_i32_to_i16: ; BE: // %bb.0: -; BE-NEXT: lsr w8, w0, #16 -; BE-NEXT: strh w0, [x1] -; BE-NEXT: strh w8, [x1, #2] +; BE-NEXT: ror w8, w0, #16 +; BE-NEXT: str w8, [x1] ; BE-NEXT: ret %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -228,9 +227,8 @@ define void @le_i32_to_i16_order(i32 %x, i16* %p0) { ; ; BE-LABEL: le_i32_to_i16_order: ; BE: // %bb.0: -; BE-NEXT: lsr w8, w0, #16 -; BE-NEXT: strh w8, [x1, #2] -; BE-NEXT: strh w0, [x1] +; BE-NEXT: ror w8, w0, #16 +; BE-NEXT: str w8, [x1] ; BE-NEXT: ret %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -244,9 +242,8 @@ define void @le_i32_to_i16_order(i32 %x, i16* %p0) { define void @be_i32_to_i16(i32 %x, i16* %p0) { ; LE-LABEL: be_i32_to_i16: ; LE: // %bb.0: -; LE-NEXT: lsr w8, w0, #16 -; LE-NEXT: strh w0, [x1, #2] -; LE-NEXT: strh w8, [x1] +; LE-NEXT: ror w8, w0, #16 +; LE-NEXT: str w8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i32_to_i16: @@ -265,9 +262,8 @@ define void @be_i32_to_i16(i32 %x, i16* %p0) { define void @be_i32_to_i16_order(i32 %x, i16* %p0) { ; LE-LABEL: be_i32_to_i16_order: ; LE: // %bb.0: -; LE-NEXT: lsr w8, w0, #16 -; LE-NEXT: strh w8, [x1] -; LE-NEXT: strh w0, [x1, #2] +; LE-NEXT: ror w8, w0, #16 +; LE-NEXT: str w8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i32_to_i16_order: @@ -528,13 +524,12 @@ define void @le_i64_to_i16_order(i64 %x, i16* %p0) { define void @be_i64_to_i16(i64 %x, i16* %p0) { ; LE-LABEL: be_i64_to_i16: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #16 -; LE-NEXT: lsr x9, x0, #32 -; LE-NEXT: lsr x10, x0, #48 -; LE-NEXT: strh w0, [x1, #6] -; LE-NEXT: strh w8, [x1, #4] -; LE-NEXT: strh w9, [x1, #2] -; LE-NEXT: strh w10, [x1] +; LE-NEXT: lsr x8, x0, #32 +; LE-NEXT: lsr x9, x0, #48 +; LE-NEXT: ror w10, w0, #16 +; LE-NEXT: str w10, [x1, #4] +; LE-NEXT: strh w8, [x1, #2] +; LE-NEXT: strh w9, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i16: @@ -599,8 +594,8 @@ define void @le_i64_to_i32(i64 %x, i32* %p0) { ; ; BE-LABEL: le_i64_to_i32: ; BE: // %bb.0: -; BE-NEXT: lsr x8, x0, #32 -; BE-NEXT: stp w0, w8, [x1] +; BE-NEXT: ror x8, x0, #32 +; BE-NEXT: str x8, [x1] ; BE-NEXT: ret %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 @@ -619,8 +614,8 @@ define void @le_i64_to_i32_order(i64 %x, i32* %p0) { ; ; BE-LABEL: le_i64_to_i32_order: ; BE: // %bb.0: -; BE-NEXT: lsr x8, x0, #32 -; BE-NEXT: stp w0, w8, [x1] +; BE-NEXT: ror x8, x0, #32 +; BE-NEXT: str x8, [x1] ; BE-NEXT: ret %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 @@ -634,8 +629,8 @@ define void @le_i64_to_i32_order(i64 %x, i32* %p0) { define void @be_i64_to_i32(i64 %x, i32* %p0) { ; LE-LABEL: be_i64_to_i32: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #32 -; LE-NEXT: stp w8, w0, [x1] +; LE-NEXT: ror x8, x0, #32 +; LE-NEXT: str x8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i32: @@ -654,8 +649,8 @@ define void @be_i64_to_i32(i64 %x, i32* %p0) { define void @be_i64_to_i32_order(i64 %x, i32* %p0) { ; LE-LABEL: be_i64_to_i32_order: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #32 -; LE-NEXT: stp w8, w0, [x1] +; LE-NEXT: ror x8, x0, #32 +; LE-NEXT: str x8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i32_order: diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll index 6d6796d1c902d..14dd43ed71a46 100644 --- a/llvm/test/CodeGen/X86/stores-merging.ll +++ b/llvm/test/CodeGen/X86/stores-merging.ll @@ -482,9 +482,8 @@ define void @trunc_i32_to_i16(i32 %x, i16* %p) { define void @be_i32_to_i16(i32 %x, i16* %p0) { ; CHECK-LABEL: be_i32_to_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: movw %di, 2(%rsi) -; CHECK-NEXT: shrl $16, %edi -; CHECK-NEXT: movw %di, (%rsi) +; CHECK-NEXT: rorl $16, %edi +; CHECK-NEXT: movl %edi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -498,10 +497,8 @@ define void @be_i32_to_i16(i32 %x, i16* %p0) { define void @be_i32_to_i16_order(i32 %x, i16* %p0) { ; CHECK-LABEL: be_i32_to_i16_order: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shrl $16, %eax -; CHECK-NEXT: movw %ax, (%rsi) -; CHECK-NEXT: movw %di, 2(%rsi) +; CHECK-NEXT: rorl $16, %edi +; CHECK-NEXT: movl %edi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -589,9 +586,8 @@ define void @trunc_i64_to_i32(i64 %x, i32* %p) { define void @be_i64_to_i32(i64 %x, i32* %p0) { ; CHECK-LABEL: be_i64_to_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, 4(%rsi) -; CHECK-NEXT: shrq $32, %rdi -; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: rorq $32, %rdi +; CHECK-NEXT: movq %rdi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 @@ -605,10 +601,8 @@ define void @be_i64_to_i32(i64 %x, i32* %p0) { define void @be_i64_to_i32_order(i64 %x, i32* %p0) { ; CHECK-LABEL: be_i64_to_i32_order: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shrq $32, %rax -; CHECK-NEXT: movl %eax, (%rsi) -; CHECK-NEXT: movl %edi, 4(%rsi) +; CHECK-NEXT: rorq $32, %rdi +; CHECK-NEXT: movq %rdi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 From f3a6f6ccfddfbd991269a917feb4ae9beb5a1610 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Sep 2020 10:41:05 -0700 Subject: [PATCH 384/465] [X86] Pre-commit new test case for D87214. NFC --- llvm/test/CodeGen/X86/iabs.ll | 85 +++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index 338e66622dcd9..d9fc452510c78 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -120,3 +120,88 @@ define i64 @test_i64(i64 %a) nounwind { ret i64 %abs } +define i128 @test_i128(i128 %a) nounwind { +; X86-NO-CMOV-LABEL: test_i128: +; X86-NO-CMOV: # %bb.0: +; X86-NO-CMOV-NEXT: pushl %ebp +; X86-NO-CMOV-NEXT: pushl %ebx +; X86-NO-CMOV-NEXT: pushl %edi +; X86-NO-CMOV-NEXT: pushl %esi +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NO-CMOV-NEXT: xorl %ecx, %ecx +; X86-NO-CMOV-NEXT: negl %ebp +; X86-NO-CMOV-NEXT: movl $0, %ebx +; X86-NO-CMOV-NEXT: sbbl %edx, %ebx +; X86-NO-CMOV-NEXT: movl $0, %edi +; X86-NO-CMOV-NEXT: sbbl {{[0-9]+}}(%esp), %edi +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NO-CMOV-NEXT: sbbl %esi, %ecx +; X86-NO-CMOV-NEXT: testl %esi, %esi +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-CMOV-NEXT: js .LBB4_2 +; X86-NO-CMOV-NEXT: # %bb.1: +; X86-NO-CMOV-NEXT: movl %esi, %ecx +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NO-CMOV-NEXT: movl %edx, %ebx +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NO-CMOV-NEXT: .LBB4_2: +; X86-NO-CMOV-NEXT: movl %ebp, (%eax) +; X86-NO-CMOV-NEXT: movl %ebx, 4(%eax) +; X86-NO-CMOV-NEXT: movl %edi, 8(%eax) +; X86-NO-CMOV-NEXT: movl %ecx, 12(%eax) +; X86-NO-CMOV-NEXT: popl %esi +; X86-NO-CMOV-NEXT: popl %edi +; X86-NO-CMOV-NEXT: popl %ebx +; X86-NO-CMOV-NEXT: popl %ebp +; X86-NO-CMOV-NEXT: retl $4 +; +; X86-CMOV-LABEL: test_i128: +; X86-CMOV: # %bb.0: +; X86-CMOV-NEXT: pushl %ebp +; X86-CMOV-NEXT: pushl %ebx +; X86-CMOV-NEXT: pushl %edi +; X86-CMOV-NEXT: pushl %esi +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-CMOV-NEXT: xorl %esi, %esi +; X86-CMOV-NEXT: negl %edi +; X86-CMOV-NEXT: movl $0, %ebx +; X86-CMOV-NEXT: sbbl %edx, %ebx +; X86-CMOV-NEXT: movl $0, %ebp +; X86-CMOV-NEXT: sbbl %ecx, %ebp +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-CMOV-NEXT: sbbl %eax, %esi +; X86-CMOV-NEXT: testl %eax, %eax +; X86-CMOV-NEXT: cmovnsl %eax, %esi +; X86-CMOV-NEXT: cmovnsl %ecx, %ebp +; X86-CMOV-NEXT: cmovnsl %edx, %ebx +; X86-CMOV-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-CMOV-NEXT: movl %edi, (%eax) +; X86-CMOV-NEXT: movl %ebx, 4(%eax) +; X86-CMOV-NEXT: movl %ebp, 8(%eax) +; X86-CMOV-NEXT: movl %esi, 12(%eax) +; X86-CMOV-NEXT: popl %esi +; X86-CMOV-NEXT: popl %edi +; X86-CMOV-NEXT: popl %ebx +; X86-CMOV-NEXT: popl %ebp +; X86-CMOV-NEXT: retl $4 +; +; X64-LABEL: test_i128: +; X64: # %bb.0: +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: negq %rax +; X64-NEXT: sbbq %rsi, %rdx +; X64-NEXT: testq %rsi, %rsi +; X64-NEXT: cmovnsq %rdi, %rax +; X64-NEXT: cmovnsq %rsi, %rdx +; X64-NEXT: retq + %tmp1neg = sub i128 0, %a + %b = icmp sgt i128 %a, -1 + %abs = select i1 %b, i128 %a, i128 %tmp1neg + ret i128 %abs +} + From 01b3e167575412792901c705032e304ef184a75d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Sep 2020 10:59:57 -0700 Subject: [PATCH 385/465] [X86] Use the same sequence for i128 ISD::ABS on 64-bit targets as we use for i64 on 32-bit targets. Differential Revision: https://reviews.llvm.org/D87214 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 11 ++++++++--- llvm/test/CodeGen/X86/abs.ll | 13 +++++++------ llvm/test/CodeGen/X86/iabs.ll | 13 +++++++------ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1395db57b57a0..ad8704f686c16 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -195,6 +195,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ABS , MVT::i32 , Custom); } setOperationAction(ISD::ABS , MVT::i64 , Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::ABS , MVT::i128 , Custom); // Funnel shifts. for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { @@ -29719,9 +29721,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::ABS: { - assert(N->getValueType(0) == MVT::i64 && + assert((Subtarget.is64Bit() || N->getValueType(0) == MVT::i64) && "Unexpected type (!= i64) on ABS."); - MVT HalfT = MVT::i32; + assert((!Subtarget.is64Bit() || N->getValueType(0) == MVT::i128) && + "Unexpected type (!= i128) on ABS."); + MVT VT = N->getSimpleValueType(0); + MVT HalfT = VT == MVT::i128 ? MVT::i64 : MVT::i32; SDValue Lo, Hi, Tmp; SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); @@ -29737,7 +29742,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue(Lo.getNode(), 1)); Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi)); return; } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index 345830676abaa..63faafc10ec8d 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -132,13 +132,14 @@ define i64 @test_i64(i64 %a) nounwind { define i128 @test_i128(i128 %a) nounwind { ; X64-LABEL: test_i128: ; X64: # %bb.0: -; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: negq %rax -; X64-NEXT: sbbq %rsi, %rdx -; X64-NEXT: testq %rsi, %rsi -; X64-NEXT: cmovnsq %rdi, %rax -; X64-NEXT: cmovnsq %rsi, %rdx +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: xorq %rcx, %rax +; X64-NEXT: xorq %rcx, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_i128: diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index d9fc452510c78..f052718d98400 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -191,13 +191,14 @@ define i128 @test_i128(i128 %a) nounwind { ; ; X64-LABEL: test_i128: ; X64: # %bb.0: -; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: negq %rax -; X64-NEXT: sbbq %rsi, %rdx -; X64-NEXT: testq %rsi, %rsi -; X64-NEXT: cmovnsq %rdi, %rax -; X64-NEXT: cmovnsq %rsi, %rdx +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: xorq %rcx, %rax +; X64-NEXT: xorq %rcx, %rdx ; X64-NEXT: retq %tmp1neg = sub i128 0, %a %b = icmp sgt i128 %a, -1 From 70207816e35771459d053ab9faf75a50a4cb92fb Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 15:26:43 -0400 Subject: [PATCH 386/465] [InstCombine] add ptr difference tests; NFC --- llvm/test/Transforms/InstCombine/sub-gep.ll | 56 ++++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index cf9604223f6c1..fcb24eec349a5 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -14,6 +14,32 @@ define i64 @test_inbounds([0 x i32]* %base, i64 %idx) { ret i64 %d } +define i64 @test_partial_inbounds1([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_partial_inbounds1( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl i64 [[IDX:%.*]], 2 +; CHECK-NEXT: ret i64 [[P2_IDX]] +; + %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p1 to i64 + %i2 = ptrtoint i32* %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +} + +define i64 @test_partial_inbounds2([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_partial_inbounds2( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX:%.*]], 2 +; CHECK-NEXT: ret i64 [[P2_IDX]] +; + %p1 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p1 to i64 + %i2 = ptrtoint i32* %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +} + define i64 @test_inbounds_nuw([0 x i32]* %base, i64 %idx) { ; CHECK-LABEL: @test_inbounds_nuw( ; CHECK-NEXT: [[P2_IDX:%.*]] = shl nuw nsw i64 [[IDX:%.*]], 2 @@ -69,13 +95,39 @@ define i64 @test_inbounds_nuw_swapped([0 x i32]* %base, i64 %idx) { ret i64 %d } +define i64 @test_inbounds1_nuw_swapped([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_inbounds1_nuw_swapped( +; CHECK-NEXT: [[P2_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 +; CHECK-NEXT: ret i64 [[P2_IDX_NEG]] +; + %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p2 to i64 + %i2 = ptrtoint i32* %p1 to i64 + %d = sub nuw i64 %i2, %i1 + ret i64 %d +} + +define i64 @test_inbounds2_nuw_swapped([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_inbounds2_nuw_swapped( +; CHECK-NEXT: [[P2_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 +; CHECK-NEXT: ret i64 [[P2_IDX_NEG]] +; + %p1 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p2 to i64 + %i2 = ptrtoint i32* %p1 to i64 + %d = sub nuw i64 %i2, %i1 + ret i64 %d +} + ; The sub and shl here could be nuw, but this is harder to handle. define i64 @test_inbounds_nuw_two_gep([0 x i32]* %base, i64 %idx, i64 %idx2) { ; CHECK-LABEL: @test_inbounds_nuw_two_gep( ; CHECK-NEXT: [[P1_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 ; CHECK-NEXT: [[P2_IDX_NEG_NEG:%.*]] = shl i64 [[IDX2:%.*]], 2 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[P2_IDX_NEG_NEG]], [[P1_IDX_NEG]] -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i64 [[P2_IDX_NEG_NEG]], [[P1_IDX_NEG]] +; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] ; %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx2 From 8b300679192b317aa91a28e781fcf60d4416b0d6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 15:47:57 -0400 Subject: [PATCH 387/465] [InstCombine] improve fold of pointer differences This was supposed to be an NFC cleanup, but there's a real logic difference (did not drop 'nsw') visible in some tests in addition to an efficiency improvement. This is because in the case where we have 2 GEPs, the code was *always* swapping the operands and negating the result. But if we have 2 GEPs, we should *never* need swapping/negation AFAICT. This is part of improving flags propagation noticed with PR47430. --- .../InstCombine/InstCombineAddSub.cpp | 34 +++++-------------- llvm/test/Transforms/InstCombine/sub-gep.ll | 6 ++-- llvm/test/Transforms/InstCombine/sub.ll | 34 +++++++++---------- 3 files changed, 29 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 5cf6eb2a885a6..5ce32bc592d05 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1615,43 +1615,27 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, // this. bool Swapped = false; GEPOperator *GEP1 = nullptr, *GEP2 = nullptr; + if (!isa(LHS) && isa(RHS)) { + std::swap(LHS, RHS); + Swapped = true; + } - // For now we require one side to be the base pointer "A" or a constant - // GEP derived from it. - if (GEPOperator *LHSGEP = dyn_cast(LHS)) { + // Require at least one GEP with a common base pointer on both sides. + if (auto *LHSGEP = dyn_cast(LHS)) { // (gep X, ...) - X if (LHSGEP->getOperand(0) == RHS) { GEP1 = LHSGEP; - Swapped = false; - } else if (GEPOperator *RHSGEP = dyn_cast(RHS)) { + } else if (auto *RHSGEP = dyn_cast(RHS)) { // (gep X, ...) - (gep X, ...) if (LHSGEP->getOperand(0)->stripPointerCasts() == - RHSGEP->getOperand(0)->stripPointerCasts()) { - GEP2 = RHSGEP; + RHSGEP->getOperand(0)->stripPointerCasts()) { GEP1 = LHSGEP; - Swapped = false; - } - } - } - - if (GEPOperator *RHSGEP = dyn_cast(RHS)) { - // X - (gep X, ...) - if (RHSGEP->getOperand(0) == LHS) { - GEP1 = RHSGEP; - Swapped = true; - } else if (GEPOperator *LHSGEP = dyn_cast(LHS)) { - // (gep X, ...) - (gep X, ...) - if (RHSGEP->getOperand(0)->stripPointerCasts() == - LHSGEP->getOperand(0)->stripPointerCasts()) { - GEP2 = LHSGEP; - GEP1 = RHSGEP; - Swapped = true; + GEP2 = RHSGEP; } } } if (!GEP1) - // No GEP found. return nullptr; if (GEP2) { diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index fcb24eec349a5..f31eeb46d8823 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -124,10 +124,10 @@ define i64 @test_inbounds2_nuw_swapped([0 x i32]* %base, i64 %idx) { ; The sub and shl here could be nuw, but this is harder to handle. define i64 @test_inbounds_nuw_two_gep([0 x i32]* %base, i64 %idx, i64 %idx2) { ; CHECK-LABEL: @test_inbounds_nuw_two_gep( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX2:%.*]], 2 ; CHECK-NEXT: [[P1_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 -; CHECK-NEXT: [[P2_IDX_NEG_NEG:%.*]] = shl i64 [[IDX2:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i64 [[P2_IDX_NEG_NEG]], [[P1_IDX_NEG]] -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[P1_IDX_NEG]], [[P2_IDX]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx2 diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index dbe1631226d65..437d8f8c5c023 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -505,9 +505,9 @@ define i64 @test24b(i8* %P, i64 %A){ define i64 @test25(i8* %P, i64 %A){ ; CHECK-LABEL: @test25( -; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i64 [[A:%.*]], 1 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i64 [[B_IDX_NEG_NEG]], -84 -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[B_IDX]], -84 +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A %C = ptrtoint i16* %B to i64 @@ -520,9 +520,9 @@ define i64 @test25(i8* %P, i64 %A){ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { ; CHECK-LABEL: @test25_as1( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 -; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i16 [[TMP1]], 1 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i16 [[B_IDX_NEG_NEG]], -84 -; CHECK-NEXT: ret i16 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i16 [[B_IDX]], -84 +; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A %C = ptrtoint i16 addrspace(1)* %B to i16 @@ -825,8 +825,8 @@ define i32 @test28commuted(i32 %x, i32 %y, i32 %z) { define i64 @test29(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test29( -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j @@ -838,9 +838,9 @@ define i64 @test29(i8* %foo, i64 %i, i64 %j) { define i64 @test30(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test30( -; CHECK-NEXT: [[GEP1_IDX_NEG_NEG:%.*]] = shl i64 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %bit = bitcast i8* %foo to i32* %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i @@ -853,9 +853,9 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) { define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ; CHECK-LABEL: @test30_as1( -; CHECK-NEXT: [[GEP1_IDX_NEG_NEG:%.*]] = shl i16 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i16 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] -; CHECK-NEXT: ret i16 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i @@ -1234,10 +1234,10 @@ define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { ; "%sub = i64 %i, %j, ret i64 %sub" ; gep1 and gep2 have only one use ; CHECK-LABEL: @test58( -; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 ; CHECK-NEXT: [[GEP1_OFFS:%.*]] = add i64 [[I:%.*]], 4200 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %j From cee7f3b9fee4007e9bc43958f6970bcbeef263cc Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 8 Sep 2020 12:39:28 -0700 Subject: [PATCH 388/465] [SYCL] Take into account auxiliary cmake options for Level Zero loader Currently auxiliary cmake options are loacted after log options. Looks like auxiliary options are not taken into account for this reason. --- sycl/plugins/level_zero/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/level_zero/CMakeLists.txt b/sycl/plugins/level_zero/CMakeLists.txt index bb16a65a37c6e..f306f55f8f6cb 100755 --- a/sycl/plugins/level_zero/CMakeLists.txt +++ b/sycl/plugins/level_zero/CMakeLists.txt @@ -33,12 +33,12 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) -DOpenCL_INCLUDE_DIR=${OpenCL_INCLUDE_DIRS} -DCMAKE_INSTALL_PREFIX= -DCMAKE_INSTALL_LIBDIR:PATH=lib${LLVM_LIBDIR_SUFFIX} + ${AUX_CMAKE_FLAGS} LOG_DOWNLOAD 1 LOG_UPDATE 1 LOG_CONFIGURE 1 LOG_BUILD 1 LOG_INSTALL 1 - ${AUX_CMAKE_FLAGS} STEP_TARGETS configure,build,install DEPENDS ocl-headers BUILD_BYPRODUCTS ${LEVEL_ZERO_LOADER} From 57313f4f4c1266a79fb99c95933994dac298870b Mon Sep 17 00:00:00 2001 From: sergei <57672082+s-kanaev@users.noreply.github.com> Date: Thu, 10 Sep 2020 13:08:30 +0300 Subject: [PATCH 389/465] [SYCL] Improve checks of kernel execution range in INT_MAX limit. (#2423) Improved check is for the following use-cases. When we have a lot of small work-groups the number of work-items may still be out of INT_MAX limit Also, sum of range and offset may exceed the limitation while each value is within INT_MAX limit on its own. Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/defines.hpp | 8 +- sycl/include/CL/sycl/handler.hpp | 90 +++++++++---- .../basic_tests/range_offset_fit_in_int.cpp | 124 +++++++++++++++--- 3 files changed, 175 insertions(+), 47 deletions(-) diff --git a/sycl/include/CL/sycl/detail/defines.hpp b/sycl/include/CL/sycl/detail/defines.hpp index c26b2e18d2b9b..a34b53e9bfe23 100644 --- a/sycl/include/CL/sycl/detail/defines.hpp +++ b/sycl/include/CL/sycl/detail/defines.hpp @@ -38,11 +38,15 @@ #define SYCL_EXTERNAL #endif -#if defined(__SYCL_ID_QUERIES_FIT_IN_INT__) && __has_builtin(__builtin_assume) +#ifndef __SYCL_ID_QUERIES_FIT_IN_INT__ +#define __SYCL_ID_QUERIES_FIT_IN_INT__ 0 +#endif + +#if __SYCL_ID_QUERIES_FIT_IN_INT__ && __has_builtin(__builtin_assume) #define __SYCL_ASSUME_INT(x) __builtin_assume((x) <= INT_MAX) #else #define __SYCL_ASSUME_INT(x) -#if defined(__SYCL_ID_QUERIES_FIT_IN_INT__) && !__has_builtin(__builtin_assume) +#if __SYCL_ID_QUERIES_FIT_IN_INT__ && !__has_builtin(__builtin_assume) #warning "No assumptions will be emitted due to no __builtin_assume available" #endif #endif diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 50024cb18a14f..7506ccacd19ef 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -143,7 +143,7 @@ struct check_fn_signature { __SYCL_EXPORT device getDeviceFromHandler(handler &); -#if defined(__SYCL_ID_QUERIES_FIT_IN_INT__) +#if __SYCL_ID_QUERIES_FIT_IN_INT__ template struct NotIntMsg; template struct NotIntMsg> { @@ -159,16 +159,65 @@ template struct NotIntMsg> { }; #endif +#if __SYCL_ID_QUERIES_FIT_IN_INT__ +template +typename std::enable_if::value || + std::is_same::value>::type +checkValueRangeImpl(ValT V) { + static constexpr size_t Limit = + static_cast((std::numeric_limits::max)()); + if (V > Limit) + throw runtime_error(NotIntMsg::Msg, PI_INVALID_VALUE); +} +#endif + template typename std::enable_if>::value || std::is_same>::value>::type checkValueRange(const T &V) { -#if defined(__SYCL_ID_QUERIES_FIT_IN_INT__) - static constexpr size_t Limit = - static_cast((std::numeric_limits::max)()); +#if __SYCL_ID_QUERIES_FIT_IN_INT__ for (size_t Dim = 0; Dim < Dims; ++Dim) - if (V[Dim] > Limit) - throw runtime_error(NotIntMsg::Msg, PI_INVALID_VALUE); + checkValueRangeImpl(V[Dim]); + + { + unsigned long long Product = 1; + for (size_t Dim = 0; Dim < Dims; ++Dim) { + Product *= V[Dim]; + // check value now to prevent product overflow in the end + checkValueRangeImpl(Product); + } + } +#else + (void)V; +#endif +} + +template +void checkValueRange(const range &R, const id &O) { +#if __SYCL_ID_QUERIES_FIT_IN_INT__ + checkValueRange(R); + checkValueRange(O); + + for (size_t Dim = 0; Dim < Dims; ++Dim) { + unsigned long long Sum = R[Dim] + O[Dim]; + + checkValueRangeImpl>(Sum); + } +#else + (void)R; + (void)O; +#endif +} + +template +typename std::enable_if>::value>::type +checkValueRange(const T &V) { +#if __SYCL_ID_QUERIES_FIT_IN_INT__ + checkValueRange(V.get_global_range()); + checkValueRange(V.get_local_range()); + checkValueRange(V.get_offset()); + + checkValueRange(V.get_global_range(), V.get_offset()); #else (void)V; #endif @@ -982,8 +1031,7 @@ class __SYCL_EXPORT handler { (void)WorkItemOffset; kernel_parallel_for(KernelFunc); #else - detail::checkValueRange(NumWorkItems); - detail::checkValueRange(WorkItemOffset); + detail::checkValueRange(NumWorkItems, WorkItemOffset); MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); StoreLambda(std::move(KernelFunc)); MCGType = detail::CG::KERNEL; @@ -1015,9 +1063,7 @@ class __SYCL_EXPORT handler { (void)ExecutionRange; kernel_parallel_for(KernelFunc); #else - detail::checkValueRange(ExecutionRange.get_global_range()); - detail::checkValueRange(ExecutionRange.get_local_range()); - detail::checkValueRange(ExecutionRange.get_offset()); + detail::checkValueRange(ExecutionRange); MNDRDesc.set(std::move(ExecutionRange)); StoreLambda(std::move(KernelFunc)); MCGType = detail::CG::KERNEL; @@ -1225,9 +1271,7 @@ class __SYCL_EXPORT handler { #else nd_range ExecRange = nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); - detail::checkValueRange(ExecRange.get_global_range()); - detail::checkValueRange(ExecRange.get_local_range()); - detail::checkValueRange(ExecRange.get_offset()); + detail::checkValueRange(ExecRange); MNDRDesc.set(std::move(ExecRange)); StoreLambda(std::move(KernelFunc)); MCGType = detail::CG::KERNEL; @@ -1278,8 +1322,7 @@ class __SYCL_EXPORT handler { throwIfActionIsCreated(); verifyKernelInvoc(Kernel); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - detail::checkValueRange(NumWorkItems); - detail::checkValueRange(WorkItemOffset); + detail::checkValueRange(NumWorkItems, WorkItemOffset); MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); MCGType = detail::CG::KERNEL; extractArgsAndReqs(); @@ -1298,9 +1341,7 @@ class __SYCL_EXPORT handler { throwIfActionIsCreated(); verifyKernelInvoc(Kernel); MKernel = detail::getSyclObjImpl(std::move(Kernel)); - detail::checkValueRange(NDRange.get_global_range()); - detail::checkValueRange(NDRange.get_local_range()); - detail::checkValueRange(NDRange.get_offset()); + detail::checkValueRange(NDRange); MNDRDesc.set(std::move(NDRange)); MCGType = detail::CG::KERNEL; extractArgsAndReqs(); @@ -1400,8 +1441,7 @@ class __SYCL_EXPORT handler { (void)WorkItemOffset; kernel_parallel_for(KernelFunc); #else - detail::checkValueRange(NumWorkItems); - detail::checkValueRange(WorkItemOffset); + detail::checkValueRange(NumWorkItems, WorkItemOffset); MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); MCGType = detail::CG::KERNEL; @@ -1437,9 +1477,7 @@ class __SYCL_EXPORT handler { (void)NDRange; kernel_parallel_for(KernelFunc); #else - detail::checkValueRange(NDRange.get_global_range()); - detail::checkValueRange(NDRange.get_local_range()); - detail::checkValueRange(NDRange.get_offset()); + detail::checkValueRange(NDRange); MNDRDesc.set(std::move(NDRange)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); MCGType = detail::CG::KERNEL; @@ -1520,9 +1558,7 @@ class __SYCL_EXPORT handler { #else nd_range ExecRange = nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); - detail::checkValueRange(ExecRange.get_global_range()); - detail::checkValueRange(ExecRange.get_local_range()); - detail::checkValueRange(ExecRange.get_offset()); + detail::checkValueRange(ExecRange); MNDRDesc.set(std::move(ExecRange)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); StoreLambda(std::move(KernelFunc)); diff --git a/sycl/test/basic_tests/range_offset_fit_in_int.cpp b/sycl/test/basic_tests/range_offset_fit_in_int.cpp index acf86c920594c..658eb950d5538 100644 --- a/sycl/test/basic_tests/range_offset_fit_in_int.cpp +++ b/sycl/test/basic_tests/range_offset_fit_in_int.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-id-queries-fit-in-int %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-id-queries-fit-in-int -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out #include @@ -37,28 +37,37 @@ void test() { static constexpr size_t OutOfLimitsSize = static_cast(INT_MAX) + 1; - S::range<1> RangeOutOfLimits{OutOfLimitsSize}; - S::range<1> RangeInLimits{1}; - S::id<1> OffsetOutOfLimits{OutOfLimitsSize}; - S::id<1> OffsetInLimits{1}; - S::nd_range<1> NDRange_ROL_LIL_OIL{RangeOutOfLimits, RangeInLimits, + S::range<2> RangeOutOfLimits{OutOfLimitsSize, 1}; + S::range<2> RangeInLimits{1, 1}; + S::range<2> RangeInLimits_POL{OutOfLimitsSize / 2, 3}; + S::range<2> RangeInLimits_Large{OutOfLimitsSize / 2, 1}; + S::id<2> OffsetOutOfLimits{OutOfLimitsSize, 1}; + S::id<2> OffsetInLimits{1, 1}; + S::id<2> OffsetInLimits_Large{(OutOfLimitsSize / 4) * 3, 1}; + S::nd_range<2> NDRange_ROL_LIL_OIL{RangeOutOfLimits, RangeInLimits, OffsetInLimits}; - S::nd_range<1> NDRange_RIL_LOL_OIL{RangeInLimits, RangeOutOfLimits, + S::nd_range<2> NDRange_RIL_LOL_OIL{RangeInLimits, RangeOutOfLimits, OffsetInLimits}; - S::nd_range<1> NDRange_RIL_LIL_OOL{RangeInLimits, RangeInLimits, + S::nd_range<2> NDRange_RIL_LIL_OOL{RangeInLimits, RangeInLimits, OffsetOutOfLimits}; - S::nd_range<1> NDRange_RIL_LIL_OIL(RangeInLimits, RangeInLimits, + S::nd_range<2> NDRange_RIL_LIL_OIL(RangeInLimits, RangeInLimits, OffsetInLimits); + S::nd_range<2> NDRange_RIL_LIL_OIL_POL(S::range<2>{OutOfLimitsSize / 2, 3}, + S::range<2>{OutOfLimitsSize / 2, 1}); + S::nd_range<2> NDRange_RIL_LIL_OIL_SOL( + S::range<2>{OutOfLimitsSize / 2, 1}, S::range<2>{OutOfLimitsSize / 2, 1}, + S::id<2>{(OutOfLimitsSize / 4) * 3, (OutOfLimitsSize / 4) * 3}); int Data = 0; S::buffer Buf{&Data, 1}; + // no offset, either dim of range exceeds limit try { Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); CGH.parallel_for(RangeOutOfLimits, - [=](S::id<1> Id) { Acc[0] += 1; }); + [=](S::id<2> Id) { Acc[0] += 1; }); }); assert(false && "Exception expected"); @@ -68,23 +77,41 @@ void test() { assert(false && "Unexpected exception catched"); } + // no offset, all dims of range are in limits try { Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); CGH.parallel_for(RangeInLimits, - [Acc](S::id<1> Id) { Acc[0] += 1; }); + [Acc](S::id<2> Id) { Acc[0] += 1; }); }); } catch (...) { assert(false && "Unexpected exception catched"); } + // no offset, all dims of range are in limits, linear id exceeds limits + try { + Queue.submit([&](S::handler &CGH) { + auto Acc = Buf.get_access(CGH); + + CGH.parallel_for(RangeInLimits_POL, + [Acc](S::id<2> Id) { Acc[0] += 1; }); + }); + + assert(false && "Exception expected"); + } catch (S::runtime_error &E) { + checkRangeException(E); + } catch (...) { + assert(false && "Unexpected exception catched"); + } + + // small offset, either dim of range exceeds limit try { Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); CGH.parallel_for(RangeOutOfLimits, OffsetInLimits, - [Acc](S::id<1> Id) { Acc[0] += 1; }); + [Acc](S::id<2> Id) { Acc[0] += 1; }); }); assert(false && "Exception expected"); @@ -94,12 +121,30 @@ void test() { assert(false && "Unexpected exception catched"); } + // large offset, neither dim of range exceeds limit, offset + range > limit + try { + Queue.submit([&](S::handler &CGH) { + auto Acc = Buf.get_access(CGH); + + CGH.parallel_for( + RangeInLimits_Large, OffsetInLimits_Large, + [Acc](S::id<2> Id) { Acc[0] += 1; }); + }); + + assert(false && "Exception expected"); + } catch (S::runtime_error &E) { + checkRangeException(E); + } catch (...) { + assert(false && "Unexpected exception catched"); + } + + // large offset, neither dim of range exceeds limit try { Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); CGH.parallel_for(RangeInLimits, OffsetOutOfLimits, - [Acc](S::id<1> Id) { Acc[0] += 1; }); + [Acc](S::id<2> Id) { Acc[0] += 1; }); }); assert(false && "Exception expected"); @@ -109,66 +154,109 @@ void test() { assert(false && "Unexpected exception catched"); } + // small offset, neither range dim exceeds limit try { Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); CGH.parallel_for(RangeInLimits, OffsetInLimits, - [Acc](S::id<1> Id) { Acc[0] += 1; }); + [Acc](S::id<2> Id) { Acc[0] += 1; }); }); } catch (...) { assert(false && "Unexpected exception catched"); } + // small offset, global range's dim is out of limits try { Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); CGH.parallel_for( - NDRange_ROL_LIL_OIL, [Acc](S::nd_item<1> Id) { Acc[0] += 1; }); + NDRange_ROL_LIL_OIL, [Acc](S::nd_item<2> Id) { Acc[0] += 1; }); }); + + assert(false && "Exception expected"); } catch (S::runtime_error &E) { checkRangeException(E); } catch (...) { assert(false && "Unexpected exception catched"); } + // small offset, local range is out of limits try { Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); CGH.parallel_for( - NDRange_RIL_LOL_OIL, [Acc](S::nd_item<1> Id) { Acc[0] += 1; }); + NDRange_RIL_LOL_OIL, [Acc](S::nd_item<2> Id) { Acc[0] += 1; }); }); + + assert(false && "Exception expected"); } catch (S::runtime_error &E) { checkRangeException(E); } catch (...) { assert(false && "Unexpected exception catched"); } + // large offset, ranges are in limits try { Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); CGH.parallel_for( - NDRange_RIL_LIL_OOL, [Acc](S::nd_item<1> Id) { Acc[0] += 1; }); + NDRange_RIL_LIL_OOL, [Acc](S::nd_item<2> Id) { Acc[0] += 1; }); }); + + assert(false && "Exception expected"); } catch (S::runtime_error &E) { checkOffsetException(E); } catch (...) { assert(false && "Unexpected exception catched"); } + // small offset, ranges are in limits try { Queue.submit([&](S::handler &CGH) { auto Acc = Buf.get_access(CGH); CGH.parallel_for( - NDRange_RIL_LIL_OIL, [Acc](S::nd_item<1> Id) { Acc[0] += 1; }); + NDRange_RIL_LIL_OIL, [Acc](S::nd_item<2> Id) { Acc[0] += 1; }); }); } catch (...) { assert(false && "Unexpected exception catched"); } + + // small offset, ranges are in limits, linear id out of limits + try { + Queue.submit([&](S::handler &CGH) { + auto Acc = Buf.get_access(CGH); + + CGH.parallel_for( + NDRange_RIL_LIL_OIL_POL, [Acc](S::nd_item<2> Id) { Acc[0] += 1; }); + }); + + assert(false && "Exception expected"); + } catch (S::runtime_error &E) { + checkRangeException(E); + } catch (...) { + assert(false && "Unexpected exception catched"); + } + + // small offset, ranges are in limits, range + offset exceeds limits + try { + Queue.submit([&](S::handler &CGH) { + auto Acc = Buf.get_access(CGH); + + CGH.parallel_for( + NDRange_RIL_LIL_OIL_POL, [Acc](S::nd_item<2> Id) { Acc[0] += 1; }); + }); + + assert(false && "Exception expected"); + } catch (S::runtime_error &E) { + checkRangeException(E); + } catch (...) { + assert(false && "Unexpected exception catched"); + } } int main(void) { From 2aab33788e4c7d0a052f431145a5bd36d122f653 Mon Sep 17 00:00:00 2001 From: Ralender Date: Thu, 10 Sep 2020 17:17:55 +0200 Subject: [PATCH 390/465] [SYCL][Driver] Respect the -target cross-compilation flag. (#2417) Currently when compiling with for sycl with a -target flag, clang will invoke the device compilation with an auxiliary triple of the machine performing the compilation instead of the host target completely ignoring the -target flag. This patch fixes this. --- clang/lib/Driver/ToolChains/Clang.cpp | 4 +++- clang/test/Driver/sycl-device.cpp | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0230fff80dea0..09d0d6eb64d21 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4155,7 +4155,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fenable-sycl-dae"); // Pass the triple of host when doing SYCL - auto AuxT = llvm::Triple(llvm::sys::getProcessTriple()); + llvm::Triple AuxT = C.getDefaultToolChain().getTriple(); + if (Args.hasFlag(options::OPT_fsycl_device_only, OptSpecifier(), false)) + AuxT = llvm::Triple(llvm::sys::getProcessTriple()); std::string NormalizedTriple = AuxT.normalize(); CmdArgs.push_back("-aux-triple"); CmdArgs.push_back(Args.MakeArgString(NormalizedTriple)); diff --git a/clang/test/Driver/sycl-device.cpp b/clang/test/Driver/sycl-device.cpp index d5e135323498b..15ca3f617c3ae 100644 --- a/clang/test/Driver/sycl-device.cpp +++ b/clang/test/Driver/sycl-device.cpp @@ -22,3 +22,9 @@ // RUN: %clang -### -fsycl %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-SYCL-STD_VERSION %s // CHECK-SYCL-STD_VERSION: clang{{.*}} "-sycl-std=2020" + +/// Check that -aux-triple is set correctly +// RUN: %clang -### -fsycl -target aarch64-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-SYCL-AUX-TRIPLE %s +// TODO: %clang -### -fsycl -fsycl-device-only -target aarch64-linux-gnu +// CHECK-SYCL-AUX-TRIPLE: clang{{.*}} "-aux-triple" "aarch64-unknown-linux-gnu" From ca012c64f974056672addb0d7f18d7ab4c1227b0 Mon Sep 17 00:00:00 2001 From: rbegam <60943159+rbegam@users.noreply.github.com> Date: Thu, 10 Sep 2020 08:18:29 -0700 Subject: [PATCH 391/465] [SYCL] Fix the return data type for subgroup info queries. (#2453) Signed-off-by: rbegam --- sycl/plugins/level_zero/pi_level_zero.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 6ac436a6497c1..ae141b3c1f78e 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2754,13 +2754,13 @@ pi_result piKernelGetSubGroupInfo(pi_kernel Kernel, pi_device Device, ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); if (ParamName == PI_KERNEL_MAX_SUB_GROUP_SIZE) { - ReturnValue(size_t{ZeKernelProperties.maxSubgroupSize}); + ReturnValue(uint32_t{ZeKernelProperties.maxSubgroupSize}); } else if (ParamName == PI_KERNEL_MAX_NUM_SUB_GROUPS) { - ReturnValue(size_t{ZeKernelProperties.maxNumSubgroups}); + ReturnValue(uint32_t{ZeKernelProperties.maxNumSubgroups}); } else if (ParamName == PI_KERNEL_COMPILE_NUM_SUB_GROUPS) { - ReturnValue(size_t{ZeKernelProperties.requiredNumSubGroups}); + ReturnValue(uint32_t{ZeKernelProperties.requiredNumSubGroups}); } else if (ParamName == PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL) { - ReturnValue(size_t{ZeKernelProperties.requiredSubgroupSize}); + ReturnValue(uint32_t{ZeKernelProperties.requiredSubgroupSize}); } else { die("piKernelGetSubGroupInfo: parameter not implemented"); return {}; From a7ffe039d6ab91c9efb403efd4bc6b57b914ce45 Mon Sep 17 00:00:00 2001 From: jbrodman Date: Thu, 10 Sep 2020 11:20:39 -0400 Subject: [PATCH 392/465] [SYCL][Doc] Move USM extension doc to Khronos (#2457) Signed-off-by: James Brodman --- sycl/doc/extensions/USM/USM.adoc | 788 +------------------------------ 1 file changed, 4 insertions(+), 784 deletions(-) diff --git a/sycl/doc/extensions/USM/USM.adoc b/sycl/doc/extensions/USM/USM.adoc index 70a3b0b003ec3..6a001d771568e 100644 --- a/sycl/doc/extensions/USM/USM.adoc +++ b/sycl/doc/extensions/USM/USM.adoc @@ -1,792 +1,12 @@ = SYCL(TM) Proposals: Unified Shared Memory James Brodman ; Ben Ashbaugh ; Michael Kinsner -v0.99 +v0.999 :source-highlighter: pygments :icons: font :y: icon:check[role="green"] :n: icon:times[role="red"] +== Please Refer to SYCL 2020 -== Introduction -IMPORTANT: This specification is a draft. - -NOTE: Khronos(R) is a registered trademark and SYCL(TM) is a trademark of the Khronos Group, Inc. - -CAUTION: This document is better viewed when rendered as html with asciidoctor. GitHub does not render image icons. - -This document presents a series of changes proposed for a future version of the SYCL Specification. The goal of these proposals is to reduce the complexity and verbosity of using SYCL for programmers. These proposals also seek to reduce the barrier to integrate SYCL code into existing C++ codebases by introducing new modes that reduce the amount of code that must be changed to interface the two codes. - -== SYCL Memory Management -This section describes new properties and routines for pointer-based memory management interfaces in SYCL. These routines augment, rather than replace, the existing buffer-based interfaces in SYCL 1.2.1. - -=== Unified Addressing -Unified Addressing guarantees that all devices will use a unified address space. Pointer values in the unified address space will always refer to the same location in memory. The unified address space encompasses the host and one or more devices. Note that this does not require addresses in the unified address space to be accessible on all devices, just that pointer values will be consistent. -[cols="^25,^15,60",options="header"] - -=== Unified Shared Memory -Unified Shared Memory (USM) is a capability that, when available, provides the ability to create allocations that are visible to both host and device(s). USM builds upon Unified Addressing to define a shared address space where pointer values in this space always refer to the same location in memory. USM defines multiple tiers of increasing capability described in the following sections: - - * Explicit USM - * Restricted USM - * Concurrent USM - * System USM - -NOTE: All utility functions described below are located in the `sycl` namespace unless otherwise indicated. - -==== Explicit USM -Explicit USM defines capabilities for explicitly managing device memory. Programmers directly allocate device memory, and data must be explicitly copied between the host and a device. Device allocations are obtained through a SYCL device allocator instead of the system allocator. Device allocations are not accessible on the host, but the pointer values remain consistent on account of Unified Addressing. Greater detail about how allocations are used is described by the following tables. - -==== Restricted USM -Restricted USM defines capabilities for implicitly sharing data between host and devices. However, Restricted USM, as the name implies, is limited in that host and device may not concurrently compute on memory in the shared address space. Restricted USM builds upon Explicit USM by adding two new types of allocations, `host` and `shared`. Allocations are obtained through SYCL allocator instead of the system allocator. `shared` allocations may be limited by device memory. Greater detail about the allocation types defined in Restricted USM and their usage is described by the following tables. - -==== Concurrent USM -Concurrent USM builds upon Restricted USM by enabling concurrent access to `shared` allocations between host and devices. Additionally, some implementations may support a working set of `shared` allocations larger than device memory. - -==== System USM -System USM extends upon the previous tiers by performing all `shared` allocations with the normal system memory allocation routines. In particular, programmers may now use `malloc` or C++ `new` instead of `sycl_malloc` to create `shared` allocations. Likewise, `free` and `delete` are used instead of `sycl::free`. Note that `host` and `device` allocations are unaffected by this change and must still be allocated using their respective USM functions. - -=== USM Allocations -.Unified Shared Memory Allocation Types -[source,cpp] ----- -namespace sycl { - namespace usm { - enum class alloc { - host, - device, - shared, - unknown - }; - } -} ----- - -[cols="^25,75",options="header"] -|=== - -|Allocation Type |Description -|`host` -|Allocations in host memory that are accessible by a device. - -|`device` -|Allocations in device memory that are *not* accessible by the host. - -|`shared` -|Allocations in shared memory that are accessible by both host and device. -|=== - -[cols="6*^",options="header", stripes=none] -|=== -|Allocation Type |Initial Location |Accessible By | |Migratable To | -.3+^.^|`device` -.3+^.^|`device` -|`host` -|{n} No -|`host` -|{n} No - -|`device` -|{y} Yes -|`device` -|N/A - -|Another `device` -|Optional (P2P) -|Another `device` -|{n} No - -.2+^.^|`host` -.2+^.^|`host` -|`host` -|{y} Yes -|`host` -|N/A - -|Any `device` -|{y} Yes (likely over PCIe) -|`device` -|{n} No - -.3+^.^|`shared` -.3+^.^|`host` / `device` / Unspecified -|`host` -|{y} Yes -|`host` -|{y} Yes - -|`device` -|{y} Yes -|`device` -|{y} Yes -|Another `device` -|Optional (P2P) -|Another `device` -|Optional - -|=== - -=== C++ Allocator Interface -.usm_allocator Interface -[source, cpp] ----- -template -class usm_allocator { -public: - using value_type = T; - using pointer = T *; - using const_pointer = const T *; - using reference = T &; - using const_reference = const T &; - -public: - template struct rebind { - typedef usm_allocator other; - }; - - usm_allocator() = delete; - usm_allocator(const context &ctxt, const device &dev); - usm_allocator(const queue &q); - usm_allocator(const usm_allocator &other); - - // Construct an object - // Note: AllocKind == alloc::device is not allowed - template < - usm::alloc AllocT = AllocKind, - typename std::enable_if::type = 0> - void construct(pointer Ptr, const_reference Val); - - template < - usm::alloc AllocT = AllocKind, - typename std::enable_if::type = 0> - void construct(pointer Ptr, const_reference Val) { - throw feature_not_supported( - "Device pointers do not support construct on host"); - } - - // Destroy an object - // Note:: AllocKind == alloc::device is not allowed - template < - usm::alloc AllocT = AllocKind, - typename std::enable_if::type = 0> - void destroy(pointer Ptr); - - template < - usm::alloc AllocT = AllocKind, - typename std::enable_if::type = 0> - void destroy(pointer Ptr) { - throw feature_not_supported( - "Device pointers do not support destroy on host"); - } - - // Note:: AllocKind == alloc::device is not allowed - template < - usm::alloc AllocT = AllocKind, - typename std::enable_if::type = 0> - pointer address(reference Val); - - template < - usm::alloc AllocT = AllocKind, - typename std::enable_if::type = 0> - pointer address(reference Val) const { - throw feature_not_supported( - "Device pointers do not support address on host"); - } - - template < - usm::alloc AllocT = AllocKind, - typename std::enable_if::type = 0> - const_pointer address(const_reference Val); - - template < - usm::alloc AllocT = AllocKind, - typename std::enable_if::type = 0> - const_pointer address(const_reference Val) const { - throw feature_not_supported( - "Device pointers do not support address on host"); - } - - // Allocate memory - pointer allocate(size_t Size); - - // Deallocate memory - void deallocate(pointer Ptr, size_t size); -}; ----- - -''' -=== Utility Functions - -While the modern C++ `usm_allocator` interface is sufficient for specifying USM allocations and deallocations, many programmers may prefer C-style `malloc`-influenced APIs. As a convenience to programmers, `malloc`-style APIs are also defined. Additionally, other utility functions are specified in the following sections to perform various operations such as memory copies and initializations as well as to provide performance hints. - -==== Explicit USM -===== malloc -[source,cpp] ----- -(1) -void* sycl::malloc_device(size_t num_bytes, - const sycl::device& dev, - const sycl::context& ctxt); - -(2) -template -T* sycl::malloc_device(size_t count, - const sycl::device& dev, - const sycl::context& ctxt); ----- - -Parameters:: - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::device& dev` - the SYCL `device` to allocate on - * `const sycl::context& ctxt` - the SYCL `context` to which `device` belongs - -Return value:: Returns a pointer to the newly allocated memory on the specified `device` on success. Memory allocated by `sycl::malloc_device` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -[source,cpp] ----- -(1) -void* sycl::malloc_device(size_t num_bytes, - const sycl::queue& q); - -(2) -template -T* sycl::malloc_device(size_t count, - const sycl::queue& q); ----- - -Parameters:: - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::queue& q` - the SYCL `q` that provides the `device` and `context` to allocate against - -Return value:: Returns a pointer to the newly allocated memory on the `device` associated with `q` on success. Memory allocated by `sycl::malloc_device` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -===== aligned_alloc -[source,cpp] ----- -(1) -void* sycl::aligned_alloc_device(size_t alignment, - size_t num_bytes, - const sycl::device& dev, - const sycl::context& ctxt); - -(2) -template -T* sycl::aligned_alloc_device(size_t alignment, - size_t count, - const sycl::device& dev, - const sycl::context& ctxt); ----- - -Parameters:: - * `size_t alignment` - specifies the byte alignment. Must be a valid alignment supported by the implementation. - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::device& dev` - the `device` to allocate on - * `const sycl::context& ctxt` - the SYCL `context` to which `device` belongs -Return value:: Returns a pointer to the newly allocated memory on the specified `device` on success. Memory allocated by `sycl::aligned_alloc_device` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -[source,cpp] ----- -(1) -void* sycl::aligned_alloc_device(size_t alignment, - size_t size, - const sycl::queue& q); - -(2) -template -T* sycl::aligned_alloc_device(size_t alignment, - size_t count, - const sycl::queue& q); ----- - -Parameters:: - * `size_t alignment` - specifies the byte alignment. Must be a valid alignment supported by the implementation. - * (1) `size_t size` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::queue& q` - the SYCL `q` that provides the `device` and `context` to allocate against - -Return value:: Returns a pointer to the newly allocated memory on the `device` associated with `q` on success. Memory allocated by `sycl::aligned_alloc_device` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -===== memcpy -[source,cpp] ----- -class handler { - ... - public: - ... - void memcpy(void* dest, const void* src, size_t num_bytes); -}; - -class queue { - ... - public: - ... - event memcpy(void* dest, const void* src, size_t num_bytes); -}; ----- -Parameters:: - * `void* dest` - pointer to the destination memory - * `const void* src` - pointer to the source memory - * `size_t num_bytes` - number of bytes to copy -Return value:: Returns an event representing the copy operation. - -===== memset -[source,cpp] ----- -class handler { - ... - public: - ... - void memset(void* ptr, int value, size_t num_bytes); -}; - -class queue { - ... - public: - ... - event memset(void* ptr, int value, size_t num_bytes); -}; ----- -Parameters:: - * `void* ptr` - pointer to the memory to fill - * `int value` - value to be set. Value is interpreted as an `unsigned char` - * `size_t num_bytes` - number of bytes to fill -Return value:: Returns an event representing the fill operation. - -===== fill -[source,cpp] ----- -class handler { - ... - public: - ... - template - void fill(void* ptr, const T& pattern, size_t count) -}; - -class queue { - ... - public: - ... - template - event fill(void* ptr, const T& pattern, size_t count); -}; ----- -Parameters:: - * `void* ptr` - pointer to the memory to fill - * `const T& pattern` - pattern to be filled. `T` should be trivially copyable. - * `size_t count` - number of times to fill `pattern` into `ptr` -Return value:: Returns an event representing the fill operation or void if on the `handler`. - -''' -==== Restricted USM -Restricted USM includes all of the Utility Functions of Explicit USM. It additionally introduces new functions to support `host` and `shared` allocations. - -===== malloc -[source,cpp] ----- -(1) -void* sycl::malloc_host(size_t num_bytes, const sycl::context& ctxt); -(2) -template -T* sycl::malloc_host(size_t count, const sycl::context& ctxt); ----- - -Parameters:: - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::context& ctxt` - the SYCL `context` that contains the devices that will access the `host` allocation -Return value:: Returns a pointer to the newly allocated `host` memory on success. Memory allocated by `sycl::malloc_host` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -[source,cpp] ----- -(1) -void* sycl::malloc_host(size_t num_bytes, const sycl::queue& q); -(2) -template -T* sycl::malloc_host(size_t count, const sycl::queue& q); ----- - -Parameters:: - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::queue& q` - the SYCL `queue` whose `context` contains the devices that will access the `host` allocation -Return value:: Returns a pointer to the newly allocated `host` memory on success. Memory allocated by `sycl::malloc_host` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -[source,cpp] ----- -(1) -void* sycl::malloc_shared(size_t num_bytes, - const sycl::device& dev, - const sycl::context& ctxt); -(2) -template -T* sycl::malloc_shared(size_t count, - const sycl::device& dev, - const sycl::context& ctxt); ----- - -Parameters:: - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::device& dev` - the SYCL device to allocate on - * `const sycl::context& ctxt` - the SYCL `context` to which `device` belongs -Return value:: Returns a pointer to the newly allocated `shared` memory on the specified `device` on success. Memory allocated by `sycl::malloc_shared` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -[source,cpp] ----- -(1) -void* sycl::malloc_shared(size_t num_bytes, - const sycl::queue& q); -(2) -template -T* sycl::malloc_shared(size_t count, - const sycl::queue& q); ----- - -Parameters:: - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::queue& q` - the SYCL `q` that provides the `device` and `context` to allocate against - -Return value:: Returns a pointer to the newly allocated `shared` memory on the `device` associated with `q` on success. Memory allocated by `sycl::malloc_shared` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -===== aligned_alloc -[source,cpp] ----- -(1) -void* sycl::aligned_alloc_host(size_t alignment, size_t num_bytes, const sycl::context& ctxt); -(2) -template -T* sycl::aligned_alloc_host(size_t alignment, size_t count, const sycl::context& ctxt); ----- - -Parameters:: - * `size_t alignment` - specifies the byte alignment. Must be a valid alignment supported by the implementation. - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::context& ctxt` - the SYCL `context` that contains the devices that will access the `host` allocation -Return value:: Returns a pointer to the newly allocated `host` memory on success. Memory allocated by `sycl::aligned_alloc_host` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -[source,cpp] ----- -(1) -void* sycl::aligned_alloc_host(size_t alignment, size_t num_bytes, const sycl::queue& q); -(2) -template -void* sycl::aligned_alloc_host(size_t alignment, size_t count, const sycl::queue& q); ----- - -Parameters:: - * `size_t alignment` - specifies the byte alignment. Must be a valid alignment supported by the implementation. - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::queue& q` - the SYCL `q` whose `context` contains the devices that will access the `host` allocation -Return value:: Returns a pointer to the newly allocated `host` memory on success. Memory allocated by `sycl::aligned_alloc_host` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -[source,cpp] ----- -(1) -void* sycl::aligned_alloc_shared(size_t alignment, - size_t num_bytes, - const sycl::device& dev, - const sycl::context& ctxt); -(2) -template -T* sycl::aligned_alloc_shared(size_t alignment, - size_t count, - const sycl::device& dev, - const sycl::context& ctxt); ----- - -Parameters:: - * `size_t alignment` - specifies the byte alignment. Must be a valid alignment supported by the implementation. - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::device& dev` - the SYCL `device` to allocate on - * `const sycl::context& ctxt` - the SYCL `context` to which `device` belongs -Return value:: Returns a pointer to the newly allocated `shared` memory on the specified `device` on success. Memory allocated by `sycl::aligned_alloc_shared` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -[source,cpp] ----- -(1) -void* sycl::aligned_alloc_shared(size_t alignment, - size_t num_bytes, - const sycl::queue& q); -(2) -template -T* sycl::aligned_alloc_shared(size_t alignment, - size_t count, - const sycl::queue& q); ----- - -Parameters:: - * `size_t alignment` - specifies the byte alignment. Must be a valid alignment supported by the implementation. - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::queue& q` - the SYCL `q` that provides the `device` and `context` to allocate against -Return value:: Returns a pointer to the newly allocated `shared` memory on the `device` associated with `q` on success. Memory allocated by `sycl::aligned_alloc_shared` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -===== Performance Hints -Programmers may provide hints to the runtime that data should be made available on a device earlier than Unified Shared Memory would normally require it to be available. This can be accomplished through enqueueing prefetch commands. Prefetch commands may not be overlapped with kernel execution in Restricted USM. - -===== prefetch -[source,cpp] ----- -class handler { - ... - public: - ... - void prefetch(const void* ptr, size_t num_bytes); -}; - -class queue { - ... - public: - ... - void prefetch(const void* ptr, size_t num_bytes); -}; ----- -Parameters:: - * `const void* ptr` - pointer to the memory to be prefetched to the device - * `size_t num_bytes` - number of bytes requested to be prefetched -Return value:: none - -''' -==== Concurrent USM -Concurrent USM contains all the utility functions of Explicit USM and Restricted USM. It introduces a new function, `sycl::queue::mem_advise`, that allows programmers to provide additional information to the underlying runtime about how different allocations are used. - -===== Performance Hints -===== prefetch -In Concurrent USM, prefetch commands may be overlapped with kernel execution. - -===== mem_advise -[source,cpp] ----- -class queue { - ... - public: - ... - event mem_advise(const void *addr, size_t num_bytes, int advice); -}; ----- - -Parameters:: - * `void* addr` - address of allocation - * `size_t num_bytes` - number of bytes in the allocation - * `int advice` - device-defined advice for the specified allocation -Return Value:: Returns an event representing the operation. - -''' -==== General -===== malloc -[source,cpp] ----- -(1) -void *sycl::malloc(size_t num_bytes, - const sycl::device& dev, - const sycl::context& ctxt, - usm::alloc kind); -(2) -template -T *sycl::malloc(size_t count, - const sycl::device& dev, - const sycl::context& ctxt, - usm::alloc kind); ----- - -Parameters:: - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::device& dev` - the SYCL device to allocate on (if applicable) - * `const sycl::context& ctxt` - the SYCL `context` to which `device` belongs - * `usm::alloc kind` - the type of allocation to perform -Return value:: Returns a pointer to the newly allocated `kind` memory on the specified `device` on success. If `kind` is `alloc::host`, `dev` is ignored. Memory allocated by `sycl::malloc` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -[source,cpp] ----- -(1) -void *sycl::malloc(size_t num_bytes, - const sycl::queue& q, - usm::alloc kind); -(2) -template -T *sycl::malloc(size_t count, - const sycl::queue& q, - usm::alloc kind); ----- - -Parameters:: - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::queue& q` - the SYCL `q` that provides the `device` (if applicable) and `context` to allocate against - * `usm::alloc kind` - the type of allocation to perform -Return value:: Returns a pointer to the newly allocated `kind` memory on success. Memory allocated by `sycl::malloc` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -===== aligned_alloc -[source,cpp] ----- -(1) -void *sycl::aligned_alloc(size_t alignment, - size_t num_bytes, - const sycl::device& dev, - const sycl::context& ctxt, - usm::alloc kind); -(2) -template -T* sycl::aligned_alloc(size_t alignment, - size_t count, - const sycl::device& dev, - const sycl::context& ctxt, - usm::alloc kind); ----- - -Parameters:: - * `size_t alignment` - specifies the byte alignment. Must be a valid alignment supported by the implementation. - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::device& dev` - the SYCL device to allocate on (if applicable) - * `const sycl::context& ctxt` - the SYCL `context` to which `device` belongs - * `usm::alloc kind` - the type of allocation to perform -Return value:: Returns a pointer to the newly allocated `kind` memory on the specified `device` on success. If `kind` is `alloc::host`, `dev` is ignored. Memory allocated by `sycl::aligned_alloc` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -[source,cpp] ----- -(1) -void *sycl::aligned_alloc(size_t alignment, - size_t num_bytes, - const sycl::queue& q, - usm::alloc kind); -(2) -template -T* sycl::aligned_alloc(size_t alignment, - size_t count, - const sycl::queue& q, - usm::alloc kind); ----- - -Parameters:: - * `size_t alignment` - specifies the byte alignment. Must be a valid alignment supported by the implementation. - * (1) `size_t num_bytes` - number of bytes to allocate - * (2) `size_t count` - number of elements of type `T` to allocate - * `const sycl::queue& q` - the SYCL `q` that provides the `device` (if applicable) and `context` to allocate against. - * `usm::alloc kind` - the type of allocation to perform -Return value:: Returns a pointer to the newly allocated `kind` memory on success. Memory allocated by `sycl::aligned_alloc` must be deallocated with `sycl::free` to avoid memory leaks. On failure, returns `nullptr`. - -===== free -[source,cpp] ----- -void sycl::free(void* ptr, sycl::context& context); ----- -Parameters:: - * `void* ptr` - pointer to the memory to deallocate. Must have been allocated by a SYCL `malloc` or `aligned_alloc` function. - * `const sycl::context& ctxt` - the SYCL `context` in which `ptr` was allocated -Return value:: none - -[source,cpp] ----- -void sycl::free(void* ptr, sycl::queue& q); ----- -Parameters:: - * `void* ptr` - pointer to the memory to deallocate. Must have been allocated by a SYCL `malloc` or `aligned_alloc` function. - * `const sycl::queue& q` - the SYCL `queue` that provides the `context` in which `ptr` was allocated -Return value:: none - -''' -=== Unified Shared Memory Information and Descriptors -==== Pointer Queries -===== get_pointer_type -[source,cpp] ----- -usm::alloc get_pointer_type(const void *ptr, const context &ctxt); ----- -Parameters:: - * `const void* ptr` - the pointer to query. - * `const sycl::context& ctxt` - the SYCL `context` to which the USM allocation belongs -Return value:: Returns the USM allocation type for `ptr` if `ptr` falls inside a valid USM allocation. If `ctxt` is a host `context`, returns `usm::alloc::host`. Returns `usm::alloc::unknown` if `ptr` is not a valid USM allocation. - -===== get_pointer_device -[source,cpp] ----- -sycl::device get_pointer_device(const void *ptr, const context &ctxt); ----- -Parameters:: - * `const void* ptr` - the pointer to query - * `const sycl::context& ctxt` - the SYCL `context` to which the USM allocation belongs - Return value:: Returns the `device` associated with the USM allocation. If `ctxt` is a host `context`, returns the host `device` in `ctxt`. If `ptr` is an allocation of type `usm::alloc::host`, returns the first device in `ctxt`. Throws an error if `ptr` is not a valid USM allocation. - -==== Device Information Descriptors -[cols="^25,^15,60",options="header"] -.Unified Shared Memory Device Information Descriptors -|=== -|Device Descriptor -|Type -|Description - -|`info::device::usm_device_allocations` -|`bool` -|Returns `true` if this device supports `device` allocations as described in Explicit USM. - -|`info::device::usm_host_allocations` -|`bool` -|Returns `true` if this device can access `host` allocations. - -|`info::device::usm_shared_allocations` -|`bool` -|Returns `true` if this device supports `shared` allocations as described in Restricted USM and Concurrent USM. The device may support Restricted USM, Concurrent USM, or both. - -|`info::device::usm_restricted_shared_allocations` -|`bool` -|Returns `true` if this device supports `shared` allocations as governed by the restrictions described in Restricted USM on the device. This property requires that property `usm_shared_allocations` returns `true` for this device. - - -|`info::device::usm_system_allocator` -|`bool` -|Returns `true` if the system allocator may be used instead of SYCL USM allocation mechanisms for `shared` allocations on this device as described in System USM. - -|=== - -== SYCL Scheduling -SYCL 1.2.1 defines an execution model based on tasks submitted to Out-of-Order queues. Dependences between these tasks are constructed from the data they read and write. The data usage of a task is conveyed to the runtime by constructing accessors on buffer objects that specify their intent. Pointers obtained from using explicit memory management interfaces in SYCL cannot create accessors, so dependence graphs cannot be constructed in the same fashion. New methods are required to specify dependences between tasks. - -=== DAGs without accessors -Unified Shared Memory changes how the SYCL runtime manages data movement. Since the runtime might no longer be responsible for orchestrating data movement, it makes sense to enable a way to build dependence graphs based on ordering computations rather than accesses to data inside them. Conveniently, a SYCL `queue` already returns an `event` upon calls to `submit`. These events can be used by the programmer to wait for the submitted task to complete. - -.Example -[source,cpp] ----- -queue q; -auto dev = q.get_device(); -auto ctxt = q.get_context(); -float* a = static_cast(malloc_shared(10*sizeof(float), dev, ctxt)); -float* b = static_cast(malloc_shared(10*sizeof(float), dev, ctxt)); -float* c = static_cast(malloc_shared(10*sizeof(float), dev, ctxt)); - -auto e = q.submit([&](handler& cgh) { - cgh.parallel_for(range<1> {10}, [=](id<1> ID) { - size_t i = ID[0]; - c[i] = a[i] + b[i]; - }); -}); -e.wait(); ----- - -=== Coarse Grain DAGs with cgh.depends_on -While SYCL already defines the capability to wait on specific tasks, programmers should still be able to easily define relationships between tasks. - -[source,cpp] ----- -class handler { - ... - public: - ... - void depends_on(event e); - void depends_on(const vector_class &e); -}; ----- - -Parameters:: `e` - event or vector of events representing task(s) required to complete before this task may begin -Return value:: none - - +The Unified Shared Memory (USM) extension is now part of the provisional SYCL 2020 specification. +Please refer to that document for the latest definition of USM in SYCL at https://www.khronos.org/sycl/[SYCL @ Khronos]. \ No newline at end of file From cd9835838971c9b003fc93b8e5daa9908fc63758 Mon Sep 17 00:00:00 2001 From: sergei <57672082+s-kanaev@users.noreply.github.com> Date: Fri, 11 Sep 2020 01:27:19 +0300 Subject: [PATCH 393/465] [SYCL] Release notes for September'20 DPCPP implementation update (#2437) * [SYCL] September '20 release notes. Increment library ABI version. Signed-off-by: Sergey Kanaev --- sycl/CMakeLists.txt | 4 +- sycl/ReleaseNotes.md | 137 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 2 deletions(-) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 4574ac5d0c196..f1246e4aaecb9 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -13,9 +13,9 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") include(AddSYCLExecutable) set(SYCL_MAJOR_VERSION 3) -set(SYCL_MINOR_VERSION 0) +set(SYCL_MINOR_VERSION 1) set(SYCL_PATCH_VERSION 0) -set(SYCL_DEV_ABI_VERSION 2) +set(SYCL_DEV_ABI_VERSION 0) if (SYCL_ADD_DEV_VERSION_POSTFIX) set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}") endif() diff --git a/sycl/ReleaseNotes.md b/sycl/ReleaseNotes.md index b7d19755556db..3428d543b4fd9 100644 --- a/sycl/ReleaseNotes.md +++ b/sycl/ReleaseNotes.md @@ -1,3 +1,140 @@ +# September'20 release notes + +Release notes for commit range 5976ff0..1fc0e4f + +## New features + +## Improvements +### SYCL Compiler + - Assigned the source location of the kernel caller function to the artificial + initialization code generated in the kernel body. It enables profiling tools + to meaningfully attribute the initialization code [6744364] + - Provided compile-time warning if size of kernel arguments exceeds 2KiB in + GPU AOT mode [e00ab74] + - Changed default SYCL standard to SYCL-2020 [67acf81] + - Removed deprecated `[[cl::intel_reqd_sub_group_size(N)]]` attribute + [9dda36f] + - Enable USM address spaces by default for the FPGA hardware [7896819] + - Assume SYCL device functions are convergent [047e2ec] + - Added Dead Argument Elimination optimization [b0d98dc] [f53ede9] + - Simplified the error checking of arrays by only visiting once [c709986] + - Stop emitting kernel arguments metadata [f658918] + - Enabled `-f[no-]sycl-early-optimizations` on Windows [e1e3658] + - Mutable kernel functions are now explicitly forbidden in SYCL 2020 + [1dbc358] + - Moved hardware targeted extensions to `INTEL` namespace [3084982] + - Added support for union types as kernel parameters [5adfd79] + - Renamed `-fsycl-std-optimizations` to `-fsycl-early-optimizations` [077a507] + - Added support for `-f[no-]sycl-id-queries-fit-in-int`. Enabling this will + make compiler define `_SYCL_ID_QUERIES_FIT_IN_INT_` macro which will signal + runtime to emit `__builtin_assume()` for execution range less than `INT_MAX` + limitation [3e4da3c] + - Enabled template trail for kernel diagnostics [c767edc] + - Disabled createIndVarSimplifyPass for SPIR target in SYCL mode [76ffef7] + - Run Dead Argument Elimination when LLVM optimizations are applied as well + [cf10351] + +### SYCL Library + - Eliminated circular dependency between `event` and `queue` classes [31843cc] + - Added `ONEAPI::filter_selector` [174fd168f18] + - Added CPU-agnostic code path to the host device runtime (validated on + AArch64 systems) [2f632f8] + - Added support for `bool2`, `bool3`, `bool4`, `bool8`, `bool16` [4dfd500] + - Allowed for creating lots of host accessors [b206293] + - Improved execution graph traversal [f2eaa23] + - Improved `SYCL_PI_TRACE` [4d468f1] + - Added implementation for `SYCL_INTEL_free_function_queries` [b6d7792] + - Allowed for building program for multiple devices within single context + (esp. for FPGA devices) [2f64227] + - Cache devices and platforms [d392b51] + - Reuse devices and platforms in Level Zero PI plugin [43ba606] + - Added group algorithms for MUL/OR/XOR/AND operations [96da39e] + - Moved general language extensions to `ONEAPI` namespace [a73369d] + - Added CMake option `SYCL_DISABLE_STL_ASSERTIONS` to disable assertions + [ec2ec99] + - Implemented USM fill operation as defined in SYCL-2020 provisional [4993646] + - Added runtime support for device code argument elimination [63ac3d3] + - Imporved implementation of stream class when used in FPGA device code + [13e8dae] + - Imporved error reporting in Level Zero plugin [257658c] + - Improved kernel demangling in graph printing [62192a6] + - Improved error handling in `parallel_for` [7c73c11] + - Fixed segfault in interop constructors of context, device, platform classes + [c4c3494] + +### Documentation + - Added documentation for [`SPV_INTEL_usm_storage_classes`](doc/extensions/SPIRV/SPV_INTEL_usm_storage_classes.asciidoc) + and [SYCL_INTEL_usm_address_spaces](doc/extensions/USMAddressSpaces/usm_address_spaces.asciidoc) [781fbfc] + - Fixed SPIR-V format name spelling [6e9bf3b] + - Added extension [LocalMemory](doc/extensions/LocalMemory/SYCL_INTEL_local_memory.asciidoc) draft specification [4b5308a] + - Added extension [free functions queries](doc/extensions/FreeFunctionQueries/SYCL_INTEL_free_function_queries.asciidoc) draft specification [8953bfd] + - Removed documentation for implicit attribute `buffer_location` [71a56e7] + +## Bug fixes +### SYCL Compiler + - Fixed crash when array of pointers is a kernel argument [1fc0e4f] + - Allowed for `-P -fsycl` to be used on Windows when offloading [a21d7ef] + - Fixed looking for tools (e.g. aoc, ocloc, opencl-aot) with full name on + Windows (i.e. with `.exe` suffix) [78a86da] + - Eliminated compiler crash if invalid declaration is used as kernel argument + [0c220ca] + - Switch SPIRV debug info to legacy mode to support old OpenCL RTs [500a0b8] + - Disabled vectorizers in SYCL device code when early optimizations are + enabled [20921b1] + - Fixed crash when kernel argument is a multi-dimensional array [36f6ab6] + - Fixed `cl::sycl::INTELlsu::load()` method to return value instead of + reference [82e5323] + - Disabled "early" optimizations for Intel FPGA by default [f8902b8] + - Fixed regression on unused non-USM pointers inside struct type kernel + arguments [926eb32] + - Fixed NULL-pointer dereference in some cases [bdc2b85] + - Adjusted AUX targets with lang options [43862a3] + +### SYCL Library + - Eliminated circular dependency between command group and stream buffers, + which caused memory leaking [841e1e7] + - Added early exit from enqueue process when trying to enqueue blocked + commands. This eliminated hang in host-task when used along with multiple + buffers [bc8f0a4] + - Fixed overflow when casting glbal memory size in Level Zero plugin [82893b2] + - Fixed waiting for events on Level Zero [e503662] + - Added missing constructors and propety methods for context, program and + sampler[30b8acc] + - Fixed printing types of variables by GDB in some cases [93e1387] + - Aligned `cl::sycl::handler::require` API with the SYCL specification + [68c275c] + - Fixed undefined behaviour in memory management intrinsics [4ff2eee] + - Fixed race condition when using sampler in parallel [34f0c10] + - Fixed race condition in `ProgramManager` class, which lead to hang [e6fd911] + - Fixed thread-safety issue, which took place when using stream class [4688cb3] + - Unified usm `queue`'s `memcpy`/`memset` methods behavior for corner cases + [7b7bab6] + - Enabled USM indirect access for interoperability kernels [ebf5c4e] + +## API/ABI breakages + +## Known issues + - The format of the object files produced by the compiler can change between + versions. The workaround is to rebuild the application. + - The SYCL library doesn't guarantee stable API/ABI, so applications compiled + with older version of the SYCL library may not work with new one. + The workaround is to rebuild the application. + [ABI policy guide](doc/ABIPolicyGuide.md) + - Using `cl::sycl::program` API to refer to a kernel defined in another + translation unit leads to undefined behavior + - Linkage errors with the following message: + `error LNK2005: "bool const std::_Is_integral" (??$_Is_integral@_N@std@@3_NB) already defined` + can happen when a SYCL application is built using MS Visual Studio 2019 + version below 16.3.0 and user specifies `-std=c++14` or `/std:c++14`. + - Employing read sampler for image accessor may result in sporadic issues with + Level Zero plugin/backend [2c50c03] + - Printing internal defines isn't supported on Windows [50628db] + - Group algorithms for MUL/AND/OR/XOR cannot be enabled for group scope due to + SPIR-V limitations, and are not enabled for sub-group scope yet as the + SPIR-V version isn't automatically raised from 1.1 to 1.3 [96da39e] + - We cannot run Dead Argument Elimination for ESIMD since the pointers to SPIR + kernel functions are saved in `!genx.kernels metadata` [cf10351] + # August'20 release notes Release notes for the commit range 75b3dc2..5976ff0 From f253851f7ab67b5704f298dd8ec7695a11b64706 Mon Sep 17 00:00:00 2001 From: mdtoguchi <47896532+mdtoguchi@users.noreply.github.com> Date: Fri, 11 Sep 2020 01:56:20 -0700 Subject: [PATCH 394/465] [Driver][SYCL][FPGA] Enable dependency file usage from static archives (#2443) When performing compilations for FPGA, we want to be sure to take advantage of dependency information that could be part of the fat static archives. --- clang/include/clang/Driver/Types.def | 1 + clang/lib/Driver/Compilation.cpp | 3 +- clang/lib/Driver/Driver.cpp | 47 +++++++++++++++---- clang/lib/Driver/ToolChains/Clang.cpp | 9 ++-- clang/lib/Driver/ToolChains/SYCL.cpp | 5 +- .../Driver/sycl-intelfpga-static-lib-win.cpp | 33 +++++++++++++ .../test/Driver/sycl-intelfpga-static-lib.cpp | 33 +++++++++++++ clang/test/Driver/sycl-offload-intelfpga.cpp | 24 ++++++---- 8 files changed, 130 insertions(+), 25 deletions(-) create mode 100644 clang/test/Driver/sycl-intelfpga-static-lib-win.cpp create mode 100644 clang/test/Driver/sycl-intelfpga-static-lib.cpp diff --git a/clang/include/clang/Driver/Types.def b/clang/include/clang/Driver/Types.def index 7b003aab3eb33..8e5b2afdcb976 100644 --- a/clang/include/clang/Driver/Types.def +++ b/clang/include/clang/Driver/Types.def @@ -111,4 +111,5 @@ TYPE("fpga_aocx", FPGA_AOCX, INVALID, "aocx", phases TYPE("fpga_aocr", FPGA_AOCR, INVALID, "aocr", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("fpga_aoco", FPGA_AOCO, INVALID, "aoco", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("fpga_dependencies", FPGA_Dependencies, INVALID, "d", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("fpga_dependencies_list", FPGA_Dependencies_List, INVALID, "txt", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("none", Nothing, INVALID, nullptr, phases::Compile, phases::Backend, phases::Assemble, phases::Link) diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp index c24a6f3765f26..46a82dce89c8a 100644 --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -150,7 +150,8 @@ bool Compilation::CleanupFileList(const TempFileList &Files, // Temporary file lists contain files that need to be cleaned. The // file containing the information is also removed if (File.second == types::TY_Tempfilelist || - File.second == types::TY_Tempfiletable) { + File.second == types::TY_Tempfiletable || + File.second == types::TY_FPGA_Dependencies_List) { // These are temporary files and need to be removed. bool IsTable = File.second == types::TY_Tempfiletable; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 5681a8beb363a..8ade755e5bd80 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3613,6 +3613,9 @@ class OffloadingActionBuilder final { /// List of objects to extract FPGA dependency info from ActionList FPGAObjectInputs; + /// List of static archives to extract FPGA dependency info from + ActionList FPGAArchiveInputs; + /// List of CUDA architectures to use in this compilation with NVPTX targets. SmallVector GpuArchList; @@ -4061,15 +4064,19 @@ class OffloadingActionBuilder final { // triple calls for it (provided a valid subarch). ActionList BEInputs; BEInputs.push_back(BuildCodeAction); - for (Action *A : FPGAObjectInputs) { - // Send any known objects through the unbundler to grab the - // dependency file associated. + auto unbundleAdd = [&](Action *A, types::ID T) { ActionList AL; AL.push_back(A); - Action *UnbundleAction = C.MakeAction( - AL, types::TY_FPGA_Dependencies); + Action *UnbundleAction = + C.MakeAction(AL, T); BEInputs.push_back(UnbundleAction); - } + }; + // Send any known objects/archives through the unbundler to grab the + // dependency file associated. + for (Action *A : FPGAObjectInputs) + unbundleAdd(A, types::TY_FPGA_Dependencies); + for (Action *A : FPGAArchiveInputs) + unbundleAdd(A, types::TY_FPGA_Dependencies_List); for (const auto &A : DeviceLibObjects) BEInputs.push_back(A); BuildCodeAction = @@ -4194,6 +4201,7 @@ class OffloadingActionBuilder final { Arg *SYCLAddTargets = Args.getLastArg(options::OPT_fsycl_add_targets_EQ); bool HasValidSYCLRuntime = C.getInputArgs().hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false); + bool SYCLfpgaTriple = false; if (SYCLTargets || SYCLAddTargets) { if (SYCLTargets) { llvm::StringMap FoundNormalizedTriples; @@ -4211,6 +4219,8 @@ class OffloadingActionBuilder final { FoundNormalizedTriples[NormalizedName] = Val; SYCLTripleList.push_back(TT); + if (TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga) + SYCLfpgaTriple = true; } } if (SYCLAddTargets) { @@ -4234,6 +4244,8 @@ class OffloadingActionBuilder final { const char *SYCLTargetArch = SYCLfpga ? "spir64_fpga" : "spir64"; SYCLTripleList.push_back( C.getDriver().MakeSYCLDeviceTriple(SYCLTargetArch)); + if (SYCLfpga) + SYCLfpgaTriple = true; } // Set the FPGA output type based on command line (-fsycl-link). @@ -4241,6 +4253,21 @@ class OffloadingActionBuilder final { FPGAOutType = (A->getValue() == StringRef("early")) ? types::TY_FPGA_AOCR : types::TY_FPGA_AOCX; + // Populate FPGA static archives that could contain dep files to be + // incorporated into the aoc compilation + if (SYCLfpgaTriple) { + SmallVector LinkArgs(getLinkerArgs(C, Args)); + for (const StringRef &LA : LinkArgs) { + if (isStaticArchiveFile(LA) && hasOffloadSections(C, LA, Args)) { + const llvm::opt::OptTable &Opts = C.getDriver().getOpts(); + Arg *InputArg = MakeInputArg(Args, Opts, Args.MakeArgString(LA)); + Action *Current = + C.MakeAction(*InputArg, types::TY_Archive); + FPGAArchiveInputs.push_back(Current); + } + } + } + DeviceLinkerInputs.resize(ToolChains.size()); return initializeGpuArchMap(); } @@ -5976,12 +6003,14 @@ InputInfo Driver::BuildJobsForActionNoCache( // Do a check for a dependency file unbundle for FPGA. This is out of line // from a regular unbundle, so just create and return the name of the // unbundled file. - if (JA->getType() == types::TY_FPGA_Dependencies) { + if (JA->getType() == types::TY_FPGA_Dependencies || + JA->getType() == types::TY_FPGA_Dependencies_List) { + std::string Ext(types::getTypeTempSuffix(JA->getType())); std::string TmpFileName = - C.getDriver().GetTemporaryPath(llvm::sys::path::stem(BaseInput), "d"); + C.getDriver().GetTemporaryPath(llvm::sys::path::stem(BaseInput), Ext); const char *TmpFile = C.addTempFile(C.getArgs().MakeArgString(TmpFileName)); - Result = InputInfo(types::TY_FPGA_Dependencies, TmpFile, TmpFile); + Result = InputInfo(JA->getType(), TmpFile, TmpFile); UnbundlingResults.push_back(Result); } else { // Now that we have all the results generated, select the one that should diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 09d0d6eb64d21..0e90a1bf89f16 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7401,7 +7401,8 @@ void OffloadBundler::ConstructJobMultipleOutputs( bool IsMSVCEnv = C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); types::ID InputType(Input.getType()); - bool IsFPGADepUnbundle = (JA.getType() == types::TY_FPGA_Dependencies); + bool IsFPGADepUnbundle = JA.getType() == types::TY_FPGA_Dependencies; + bool IsFPGADepLibUnbundle = JA.getType() == types::TY_FPGA_Dependencies_List; bool IsArchiveUnbundle = (!IsMSVCEnv && C.getDriver().getOffloadStaticLibSeen() && (types::isArchive(InputType) || InputType == types::TY_Object)); @@ -7417,7 +7418,7 @@ void OffloadBundler::ConstructJobMultipleOutputs( else TypeArg = "aoo"; } - if (InputType == types::TY_FPGA_AOCO || + if (InputType == types::TY_FPGA_AOCO || IsFPGADepLibUnbundle || (IsMSVCEnv && types::isArchive(InputType))) TypeArg = "aoo"; if (IsFPGADepUnbundle) @@ -7476,7 +7477,7 @@ void OffloadBundler::ConstructJobMultipleOutputs( Triples += Dep.DependentBoundArch; } } - if (IsFPGADepUnbundle) { + if (IsFPGADepUnbundle || IsFPGADepLibUnbundle) { // TODO - We are currently using the target triple inputs to slot a location // of the dependency information into the bundle. It would be good to // separate this out to an explicit option in the bundler for the dependency @@ -7497,7 +7498,7 @@ void OffloadBundler::ConstructJobMultipleOutputs( // When dealing with -fintelfpga, there is an additional unbundle step // that occurs for the dependency file. In that case, do not use the // dependent information, but just the output file. - if (IsFPGADepUnbundle) + if (IsFPGADepUnbundle || IsFPGADepLibUnbundle) UB += Outputs[0].getFilename(); else { for (unsigned I = 0; I < Outputs.size(); ++I) { diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index b61588399b9ba..9b97c004fd2ab 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -233,7 +233,8 @@ void SYCL::fpga::BackendCompiler::ConstructJob(Compilation &C, // Add any FPGA library lists. These come in as special tempfile lists. CmdArgs.push_back(Args.MakeArgString(Twine("-library-list=") + Filename)); - else if (II.getType() == types::TY_FPGA_Dependencies) + else if (II.getType() == types::TY_FPGA_Dependencies || + II.getType() == types::TY_FPGA_Dependencies_List) FPGADepFiles.push_back(II); else CmdArgs.push_back(C.getArgs().MakeArgString(Filename)); @@ -287,6 +288,8 @@ void SYCL::fpga::BackendCompiler::ConstructJob(Compilation &C, for (unsigned I = 0; I < FPGADepFiles.size(); ++I) { if (I) DepOpt += ','; + if (FPGADepFiles[I].getType() == types::TY_FPGA_Dependencies_List) + DepOpt += "@"; DepOpt += FPGADepFiles[I].getFilename(); } CmdArgs.push_back(C.getArgs().MakeArgString(DepOpt)); diff --git a/clang/test/Driver/sycl-intelfpga-static-lib-win.cpp b/clang/test/Driver/sycl-intelfpga-static-lib-win.cpp new file mode 100644 index 0000000000000..2f61c8d9567d4 --- /dev/null +++ b/clang/test/Driver/sycl-intelfpga-static-lib-win.cpp @@ -0,0 +1,33 @@ +/// +/// tests specific to -fintelfpga -fsycl w/ static libs +/// +// REQUIRES: clang-driver +// REQUIRES: system-windows + +// make dummy archive +// Build a fat static lib that will be used for all tests +// RUN: echo "void foo(void) {}" > %t1.cpp +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -fintelfpga %t1.cpp -c -o %t1_bundle.obj +// RUN: lib -out:%t.lib %t1_bundle.obj + +/// Check phases with static lib +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -fintelfpga %t.lib -ccc-print-phases 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK_PHASES %s +// CHECK_PHASES: 0: input, "[[INPUT:.+\.lib]]", object, (host-sycl) +// CHECK_PHASES: 1: linker, {0}, image, (host-sycl) +// CHECK_PHASES: 2: input, "[[INPUT]]", archive +// CHECK_PHASES: 3: clang-offload-unbundler, {2}, archive +// CHECK_PHASES: 4: linker, {3}, ir, (device-sycl) +// CHECK_PHASES: 5: sycl-post-link, {4}, ir, (device-sycl) +// CHECK_PHASES: 6: llvm-spirv, {5}, spirv, (device-sycl) +// CHECK_PHASES: 7: input, "[[INPUT]]", archive +// CHECK_PHASES: 8: clang-offload-unbundler, {7}, fpga_dependencies_list +// CHECK_PHASES: 9: backend-compiler, {6, 8}, fpga_aocx, (device-sycl) +// CHECK_PHASES: 10: clang-offload-wrapper, {9}, object, (device-sycl) +// CHECK_PHASES: 11: offload, "host-sycl (x86_64-pc-windows-msvc)" {1}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {10}, image + +/// Check for unbundle and use of deps in static lib +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -fintelfpga %t.lib -### 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK_UNBUNDLE %s +// CHECK_UNBUNDLE: clang-offload-bundler" "-type=aoo" "-targets=sycl-fpga_dep" "-inputs={{.*}}" "-outputs=[[DEPFILES:.+\.txt]]" "-unbundle" +// CHECK_UNBUNDLE: aoc{{.*}} "-dep-files=@[[DEPFILES]]" diff --git a/clang/test/Driver/sycl-intelfpga-static-lib.cpp b/clang/test/Driver/sycl-intelfpga-static-lib.cpp new file mode 100644 index 0000000000000..510684ddaed7f --- /dev/null +++ b/clang/test/Driver/sycl-intelfpga-static-lib.cpp @@ -0,0 +1,33 @@ +/// +/// tests specific to -fintelfpga -fsycl w/ static libs +/// +// REQUIRES: clang-driver + +// make dummy archive +// Build a fat static lib that will be used for all tests +// RUN: echo "void foo(void) {}" > %t1.cpp +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fintelfpga -fsycl %t1.cpp -c -o %t1_bundle.o +// RUN: llvm-ar cr %t.a %t1_bundle.o + +/// Check phases with static lib +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fintelfpga %t.a -ccc-print-phases 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK_PHASES %s +// CHECK_PHASES: 0: input, "[[INPUT:.+\.a]]", object, (host-sycl) +// CHECK_PHASES: 1: linker, {0}, image, (host-sycl) +// CHECK_PHASES: 2: input, "[[INPUT]]", archive +// CHECK_PHASES: 3: partial-link, {2}, object +// CHECK_PHASES: 4: clang-offload-unbundler, {3}, object +// CHECK_PHASES: 5: linker, {4}, ir, (device-sycl) +// CHECK_PHASES: 6: sycl-post-link, {5}, ir, (device-sycl) +// CHECK_PHASES: 7: llvm-spirv, {6}, spirv, (device-sycl) +// CHECK_PHASES: 8: input, "[[INPUT]]", archive +// CHECK_PHASES: 9: clang-offload-unbundler, {8}, fpga_dependencies_list +// CHECK_PHASES: 10: backend-compiler, {7, 9}, fpga_aocx, (device-sycl) +// CHECK_PHASES: 11: clang-offload-wrapper, {10}, object, (device-sycl) +// CHECK_PHASES: 12: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {11}, image + +/// Check for unbundle and use of deps in static lib +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fintelfpga %t.a -### 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK_UNBUNDLE %s +// CHECK_UNBUNDLE: clang-offload-bundler" "-type=aoo" "-targets=sycl-fpga_dep" "-inputs={{.*}}" "-outputs=[[DEPFILES:.+\.txt]]" "-unbundle" +// CHECK_UNBUNDLE: aoc{{.*}} "-dep-files=@[[DEPFILES]]" diff --git a/clang/test/Driver/sycl-offload-intelfpga.cpp b/clang/test/Driver/sycl-offload-intelfpga.cpp index 71d5c2bc76051..7c76b19db1eb7 100644 --- a/clang/test/Driver/sycl-offload-intelfpga.cpp +++ b/clang/test/Driver/sycl-offload-intelfpga.cpp @@ -368,11 +368,13 @@ // CHK-FPGA-AOCO-PHASES: 15: linker, {11, 14}, ir, (device-sycl) // CHK-FPGA-AOCO-PHASES: 16: sycl-post-link, {15}, ir, (device-sycl) // CHK-FPGA-AOCO-PHASES: 17: llvm-spirv, {16}, spirv, (device-sycl) -// CHK-FPGA-AOCO-PHASES: 18: input, "[[INPUTA]]", fpga_aoco -// CHK-FPGA-AOCO-PHASES: 19: clang-offload-unbundler, {18}, fpga_aoco -// CHK-FPGA-AOCO-PHASES: 20: backend-compiler, {17, 19}, fpga_aocx, (device-sycl) -// CHK-FPGA-AOCO-PHASES: 21: clang-offload-wrapper, {20}, object, (device-sycl) -// CHK-FPGA-AOCO-PHASES: 22: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {21}, image +// CHK-FPGA-AOCO-PHASES: 18: input, "[[INPUTA]]", archive +// CHK-FPGA-AOCO-PHASES: 19: clang-offload-unbundler, {18}, fpga_dependencies_list +// CHK-FPGA-AOCO-PHASES: 20: input, "[[INPUTA]]", fpga_aoco +// CHK-FPGA-AOCO-PHASES: 21: clang-offload-unbundler, {20}, fpga_aoco +// CHK-FPGA-AOCO-PHASES: 22: backend-compiler, {17, 19, 21}, fpga_aocx, (device-sycl) +// CHK-FPGA-AOCO-PHASES: 23: clang-offload-wrapper, {22}, object, (device-sycl) +// CHK-FPGA-AOCO-PHASES: 24: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {23}, image /// FPGA AOCO Windows phases check // RUN: %clang_cl -fsycl -fintelfpga -foffload-static-lib=%t_aoco_cl.a %s -### -ccc-print-phases 2>&1 \ @@ -394,11 +396,13 @@ // CHK-FPGA-AOCO-PHASES-WIN: 14: linker, {11, 13}, ir, (device-sycl) // CHK-FPGA-AOCO-PHASES-WIN: 15: sycl-post-link, {14}, ir, (device-sycl) // CHK-FPGA-AOCO-PHASES-WIN: 16: llvm-spirv, {15}, spirv, (device-sycl) -// CHK-FPGA-AOCO-PHASES-WIN: 17: input, "[[INPUTA]]", fpga_aoco -// CHK-FPGA-AOCO-PHASES-WIN: 18: clang-offload-unbundler, {17}, fpga_aoco -// CHK-FPGA-AOCO-PHASES-WIN: 19: backend-compiler, {16, 18}, fpga_aocx, (device-sycl) -// CHK-FPGA-AOCO-PHASES-WIN: 20: clang-offload-wrapper, {19}, object, (device-sycl) -// CHK-FPGA-AOCO-PHASES-WIN: 21: offload, "host-sycl (x86_64-pc-windows-msvc)" {10}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {20}, image +// CHK-FPGA-AOCO-PHASES-WIN: 17: input, "[[INPUTA]]", archive +// CHK-FPGA-AOCO-PHASES-WIN: 18: clang-offload-unbundler, {17}, fpga_dependencies_list +// CHK-FPGA-AOCO-PHASES-WIN: 19: input, "[[INPUTA]]", fpga_aoco +// CHK-FPGA-AOCO-PHASES-WIN: 20: clang-offload-unbundler, {19}, fpga_aoco +// CHK-FPGA-AOCO-PHASES-WIN: 21: backend-compiler, {16, 18, 20}, fpga_aocx, (device-sycl) +// CHK-FPGA-AOCO-PHASES-WIN: 22: clang-offload-wrapper, {21}, object, (device-sycl) +// CHK-FPGA-AOCO-PHASES-WIN: 23: offload, "host-sycl (x86_64-pc-windows-msvc)" {10}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {22}, image /// aoco test, checking tools // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fintelfpga -foffload-static-lib=%t_aoco.a -### %s 2>&1 \ From 998c97fec8ced4d9bcee07d666560f56fdc77d2a Mon Sep 17 00:00:00 2001 From: Alexander Batashev Date: Fri, 11 Sep 2020 17:54:54 +0300 Subject: [PATCH 395/465] [SYCL][FPGA] Align clang with new spec of accessor_property_list (#2447) Signed-off-by: Mikhail Lychkov --- .../clang/Basic/DiagnosticSemaKinds.td | 7 +- clang/lib/Sema/SemaSYCL.cpp | 70 +++++++++++-------- clang/test/CodeGenSYCL/Inputs/sycl.hpp | 39 ++++++----- .../test/CodeGenSYCL/accessor_inheritance.cpp | 6 +- clang/test/CodeGenSYCL/buffer_location.cpp | 12 ++-- clang/test/CodeGenSYCL/integration_header.cpp | 18 ++--- .../CodeGenSYCL/kernel-param-acc-array-ih.cpp | 2 +- .../kernel-param-member-acc-array-ih.cpp | 2 +- .../test/CodeGenSYCL/struct_kernel_param.cpp | 2 +- clang/test/SemaSYCL/Inputs/sycl.hpp | 16 +++-- clang/test/SemaSYCL/accessor_inheritance.cpp | 4 +- clang/test/SemaSYCL/basic-kernel-wrapper.cpp | 2 +- clang/test/SemaSYCL/buffer_location.cpp | 40 ++++++----- clang/test/SemaSYCL/wrapped-accessor.cpp | 8 +-- 14 files changed, 125 insertions(+), 103 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5ce722fd67e12..87c6147b3f0ac 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11036,9 +11036,10 @@ def err_sycl_compiletime_property_duplication : Error< def err_sycl_invalid_property_list_param_number : Error< "%0 must have exactly one template parameter">; def err_sycl_invalid_accessor_property_template_param : Error< - "Fifth template parameter of the accessor must be of a property_list type">; -def err_sycl_invalid_property_list_template_param : Error< - "%select{property_list|property_list pack argument|buffer_location}0 " + "sixth template parameter of the accessor must be of accessor_property_list " + "type">; +def err_sycl_invalid_accessor_property_list_template_param : Error< + "%select{accessor_property_list|accessor_property_list pack argument|buffer_location}0 " "template parameter must be a " "%select{parameter pack|type|non-negative integer}1">; def warn_sycl_pass_by_value_deprecated diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 8b8cf8319e93a..211389e24f73b 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -82,8 +82,8 @@ class Util { static bool isSyclHalfType(const QualType &Ty); /// Checks whether given clang type is a full specialization of the SYCL - /// property_list class. - static bool isPropertyListType(const QualType &Ty); + /// accessor_property_list class. + static bool isAccessorPropertyListType(const QualType &Ty); /// Checks whether given clang type is a full specialization of the SYCL /// buffer_location class. @@ -1194,29 +1194,31 @@ class SyclKernelFieldChecker : public SyclKernelFieldHandler { return; } QualType PropListTy = PropList.getAsType(); - if (!Util::isPropertyListType(PropListTy)) { + if (!Util::isAccessorPropertyListType(PropListTy)) { SemaRef.Diag(Loc, diag::err_sycl_invalid_accessor_property_template_param); return; } - const auto *PropListDecl = + const auto *AccPropListDecl = cast(PropListTy->getAsRecordDecl()); - if (PropListDecl->getTemplateArgs().size() != 1) { + if (AccPropListDecl->getTemplateArgs().size() != 1) { SemaRef.Diag(Loc, diag::err_sycl_invalid_property_list_param_number) - << "property_list"; + << "accessor_property_list"; return; } - const auto TemplArg = PropListDecl->getTemplateArgs()[0]; + const auto TemplArg = AccPropListDecl->getTemplateArgs()[0]; if (TemplArg.getKind() != TemplateArgument::ArgKind::Pack) { - SemaRef.Diag(Loc, diag::err_sycl_invalid_property_list_template_param) - << /*property_list*/ 0 << /*parameter pack*/ 0; + SemaRef.Diag(Loc, + diag::err_sycl_invalid_accessor_property_list_template_param) + << /*accessor_property_list*/ 0 << /*parameter pack*/ 0; return; } for (TemplateArgument::pack_iterator Prop = TemplArg.pack_begin(); Prop != TemplArg.pack_end(); ++Prop) { if (Prop->getKind() != TemplateArgument::ArgKind::Type) { - SemaRef.Diag(Loc, diag::err_sycl_invalid_property_list_template_param) - << /*property_list pack argument*/ 1 << /*type*/ 1; + SemaRef.Diag( + Loc, diag::err_sycl_invalid_accessor_property_list_template_param) + << /*accessor_property_list pack argument*/ 1 << /*type*/ 1; return; } QualType PropTy = Prop->getAsType(); @@ -1235,13 +1237,15 @@ class SyclKernelFieldChecker : public SyclKernelFieldHandler { } const auto BufferLoc = PropDecl->getTemplateArgs()[0]; if (BufferLoc.getKind() != TemplateArgument::ArgKind::Integral) { - SemaRef.Diag(Loc, diag::err_sycl_invalid_property_list_template_param) + SemaRef.Diag(Loc, + diag::err_sycl_invalid_accessor_property_list_template_param) << /*buffer_location*/ 2 << /*non-negative integer*/ 2; return; } int LocationID = static_cast(BufferLoc.getAsIntegral().getExtValue()); if (LocationID < 0) { - SemaRef.Diag(Loc, diag::err_sycl_invalid_property_list_template_param) + SemaRef.Diag(Loc, + diag::err_sycl_invalid_accessor_property_list_template_param) << /*buffer_location*/ 2 << /*non-negative integer*/ 2; return; } @@ -1414,19 +1418,18 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { } // Handle accessor properties. If any properties were found in - // the property_list - add the appropriate attributes to ParmVarDecl. + // the accessor_property_list - add the appropriate attributes to ParmVarDecl. void handleAccessorPropertyList(ParmVarDecl *Param, const CXXRecordDecl *RecordDecl, SourceLocation Loc) { const auto *AccTy = cast(RecordDecl); - // TODO: when SYCL headers' part is ready - replace this 'if' with an error if (AccTy->getTemplateArgs().size() < 6) return; const auto PropList = cast(AccTy->getTemplateArgs()[5]); QualType PropListTy = PropList.getAsType(); - const auto *PropListDecl = + const auto *AccPropListDecl = cast(PropListTy->getAsRecordDecl()); - const auto TemplArg = PropListDecl->getTemplateArgs()[0]; + const auto TemplArg = AccPropListDecl->getTemplateArgs()[0]; // Move through TemplateArgs list of a property list and search for // properties. If found - apply the appropriate attribute to ParmVarDecl. for (TemplateArgument::pack_iterator Prop = TemplArg.pack_begin(); @@ -3455,20 +3458,17 @@ bool Util::isSyclSpecConstantType(const QualType &Ty) { return matchQualifiedTypeName(Ty, Scopes); } -bool Util::isPropertyListType(const QualType &Ty) { - return isSyclType(Ty, "property_list", true /*Tmpl*/); -} - bool Util::isSyclBufferLocationType(const QualType &Ty) { - const StringRef &Name = "buffer_location"; - std::array Scopes = { - Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"}, - Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"}, - // TODO: this doesn't belong to property namespace, instead it shall be - // in its own namespace. Change it, when the actual implementation in SYCL - // headers is ready - Util::DeclContextDesc{clang::Decl::Kind::Namespace, "property"}, - Util::DeclContextDesc{Decl::Kind::ClassTemplateSpecialization, Name}}; + const StringRef &PropertyName = "buffer_location"; + const StringRef &InstanceName = "instance"; + std::array Scopes = { + Util::DeclContextDesc{Decl::Kind::Namespace, "cl"}, + Util::DeclContextDesc{Decl::Kind::Namespace, "sycl"}, + Util::DeclContextDesc{Decl::Kind::Namespace, "INTEL"}, + Util::DeclContextDesc{Decl::Kind::Namespace, "property"}, + Util::DeclContextDesc{Decl::Kind::CXXRecord, PropertyName}, + Util::DeclContextDesc{Decl::Kind::ClassTemplateSpecialization, + InstanceName}}; return matchQualifiedTypeName(Ty, Scopes); } @@ -3482,6 +3482,16 @@ bool Util::isSyclType(const QualType &Ty, StringRef Name, bool Tmpl) { return matchQualifiedTypeName(Ty, Scopes); } +bool Util::isAccessorPropertyListType(const QualType &Ty) { + const StringRef &Name = "accessor_property_list"; + std::array Scopes = { + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"}, + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"}, + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "ONEAPI"}, + Util::DeclContextDesc{Decl::Kind::ClassTemplateSpecialization, Name}}; + return matchQualifiedTypeName(Ty, Scopes); +} + bool Util::matchQualifiedTypeName(const QualType &Ty, ArrayRef Scopes) { // The idea: check the declaration context chain starting from the type diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 0d77fd81dc7da..e266ae3d3bea5 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -71,19 +71,11 @@ enum prop_type { base_prop }; -// Compile time known accessor property -// TODO: this doesn't belong to property namespace, instead it shall be in its -// own namespace. Change it, when the actual implementation in SYCL headers is -// ready -template -class buffer_location {}; - struct property_base { virtual prop_type type() const = 0; }; } // namespace property -template class property_list { public: template @@ -102,6 +94,20 @@ class property_list { bool operator!=(const property_list &rhs) const { return false; } }; +namespace INTEL { +namespace property { +// Compile time known accessor property +struct buffer_location { + template class instance {}; +}; +} // namespace property +} // namespace INTEL + +namespace ONEAPI { +template +class accessor_property_list {}; +} // namespace ONEAPI + template struct id { template @@ -136,7 +142,7 @@ struct _ImplT { template > + typename propertyListT = ONEAPI::accessor_property_list<>> class accessor { public: @@ -150,8 +156,6 @@ class accessor { private: void __init(__attribute__((opencl_global)) dataT *Ptr, range AccessRange, range MemRange, id Offset) {} - - propertyListT prop_list; }; template @@ -339,8 +343,7 @@ const stream& operator<<(const stream &S, T&&) { } template + typename AllocatorT = int /*fake type as AllocatorT is not used*/> class buffer { public: using value_type = T; @@ -352,13 +355,13 @@ class buffer { buffer(ParamTypes... args) {} // fake constructor buffer(const range &bufferRange, - const property_list &propList = {}) {} + const property_list &propList = {}) {} buffer(T *hostData, const range &bufferRange, - const property_list &propList = {}) {} + const property_list &propList = {}) {} buffer(const T *hostData, const range &bufferRange, - const property_list &propList = {}) {} + const property_list &propList = {}) {} buffer(const buffer &rhs) = default; @@ -426,12 +429,12 @@ enum class image_channel_type : unsigned int { fp32 }; -template +template class image { public: image(image_channel_order Order, image_channel_type Type, const range &Range, - const property_list &PropList = {}) {} + const property_list &PropList = {}) {} /* -- common interface members -- */ diff --git a/clang/test/CodeGenSYCL/accessor_inheritance.cpp b/clang/test/CodeGenSYCL/accessor_inheritance.cpp index 47705df10dfbf..f6d93002e9407 100644 --- a/clang/test/CodeGenSYCL/accessor_inheritance.cpp +++ b/clang/test/CodeGenSYCL/accessor_inheritance.cpp @@ -67,13 +67,13 @@ int main() { // CHECK: [[ACC_FIELD:%[a-zA-Z0-9_]+]] = getelementptr inbounds %struct{{.*}}Base, %struct{{.*}}Base* [[BITCAST]], i32 0, i32 2 // CHECK: [[ACC1_AS_CAST:%[a-zA-Z0-9_]+]] = addrspacecast %"class{{.*}}cl::sycl::accessor"* [[ACC_FIELD]] to %"class{{.*}}cl::sycl::accessor" addrspace(4)* // Default constructor call -// CHECK: call spir_func void @_ZN2cl4sycl8accessorIcLi1ELNS0_6access4modeE1024ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_13property_listIJEEEEC1Ev(%"class{{.*}}cl::sycl::accessor" addrspace(4)* [[ACC1_AS_CAST]]) +// CHECK: call spir_func void @_ZN2cl4sycl8accessorIcLi1ELNS0_6access4modeE1024ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_6ONEAPI22accessor_property_listIJEEEEC1Ev(%"class{{.*}}cl::sycl::accessor" addrspace(4)* [[ACC1_AS_CAST]]) // CHECK: [[BITCAST1:%[a-zA-Z0-9_]+]] = bitcast %struct{{.*}}Captured* [[GEP]] to i8* -// CHECK: [[GEP1:%[a-zA-Z0-9_]+]] = getelementptr inbounds i8, i8* [[BITCAST1]], i64 24 +// CHECK: [[GEP1:%[a-zA-Z0-9_]+]] = getelementptr inbounds i8, i8* [[BITCAST1]], i64 20 // CHECK: [[BITCAST2:%[a-zA-Z0-9_]+]] = bitcast i8* [[GEP1]] to %"class{{.*}}cl::sycl::accessor"* // CHECK: [[ACC2_AS_CAST:%[a-zA-Z0-9_]+]] = addrspacecast %"class{{.*}}cl::sycl::accessor"* [[BITCAST2]] to %"class{{.*}}cl::sycl::accessor" addrspace(4)* // Default constructor call -// CHECK: call spir_func void @_ZN2cl4sycl8accessorIcLi1ELNS0_6access4modeE1024ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_13property_listIJEEEEC2Ev(%"class{{.*}}cl::sycl::accessor" addrspace(4)* [[ACC2_AS_CAST]]) +// CHECK: call spir_func void @_ZN2cl4sycl8accessorIcLi1ELNS0_6access4modeE1024ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_6ONEAPI22accessor_property_listIJEEEEC2Ev(%"class{{.*}}cl::sycl::accessor" addrspace(4)* [[ACC2_AS_CAST]]) // CHECK C field initialization // CHECK: [[FIELD_C:%[a-zA-Z0-9_]+]] = getelementptr inbounds %struct{{.*}}Captured, %struct{{.*}}Captured* [[GEP]], i32 0, i32 2 diff --git a/clang/test/CodeGenSYCL/buffer_location.cpp b/clang/test/CodeGenSYCL/buffer_location.cpp index 0cf04c824d1a1..35448a9ff589b 100644 --- a/clang/test/CodeGenSYCL/buffer_location.cpp +++ b/clang/test/CodeGenSYCL/buffer_location.cpp @@ -10,8 +10,8 @@ struct Base { cl::sycl::accessor>> + cl::sycl::ONEAPI::accessor_property_list< + cl::sycl::INTEL::property::buffer_location::instance<2>>> AccField; }; @@ -19,8 +19,8 @@ struct Captured : Base, cl::sycl::accessor>> { + cl::sycl::ONEAPI::accessor_property_list< + cl::sycl::INTEL::property::buffer_location::instance<2>>> { int C; }; @@ -29,8 +29,8 @@ int main() { cl::sycl::accessor>> + cl::sycl::ONEAPI::accessor_property_list< + cl::sycl::INTEL::property::buffer_location::instance<3>>> accessorA; cl::sycl::kernel_single_task( [=]() { diff --git a/clang/test/CodeGenSYCL/integration_header.cpp b/clang/test/CodeGenSYCL/integration_header.cpp index 146baeba4f74e..d5eca9624f3f2 100644 --- a/clang/test/CodeGenSYCL/integration_header.cpp +++ b/clang/test/CodeGenSYCL/integration_header.cpp @@ -31,18 +31,18 @@ // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 1, 4 }, // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 8 }, // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 12 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 6112, 28 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_sampler, 8, 48 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 6112, 24 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_sampler, 8, 40 }, // CHECK-EMPTY: // CHECK-NEXT: //--- _ZTSN16second_namespace13second_kernelIcEE // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 }, // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 6112, 4 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_sampler, 8, 24 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_sampler, 8, 16 }, // CHECK-EMPTY: // CHECK-NEXT: //--- _ZTS12third_kernelILi1Ei5pointIZ4mainE1XEE // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 }, // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 6112, 4 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_sampler, 8, 24 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_sampler, 8, 16 }, // CHECK-EMPTY: // CHECK-NEXT: //--- _ZTS13fourth_kernelIJN15template_arg_ns14namespaced_argILi1EEEEE // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 }, @@ -52,11 +52,11 @@ // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 }, // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 4 }, // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 8 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 24 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 28 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 44 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 48 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 64 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 20 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 24 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 36 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 40 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 52 }, // CHECK-EMPTY: // CHECK-NEXT: }; // diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp index 8d2fda59c68c7..ba94a207476a7 100644 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp @@ -21,7 +21,7 @@ // CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { // CHECK-NEXT: //--- _ZTSZ4mainE8kernel_A // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 16 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 12 }, // CHECK-EMPTY: // CHECK-NEXT: }; diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp index 8175b78d95c1f..f6590bd7475f3 100644 --- a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp @@ -21,7 +21,7 @@ // CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { // CHECK-NEXT: //--- _ZTSZ4mainE8kernel_C // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 16 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 12 }, // CHECK-EMPTY: // CHECK-NEXT: }; diff --git a/clang/test/CodeGenSYCL/struct_kernel_param.cpp b/clang/test/CodeGenSYCL/struct_kernel_param.cpp index 12a27dc6f2c87..e67915455539a 100644 --- a/clang/test/CodeGenSYCL/struct_kernel_param.cpp +++ b/clang/test/CodeGenSYCL/struct_kernel_param.cpp @@ -4,12 +4,12 @@ // CHECK: const kernel_param_desc_t kernel_signatures[] = { // CHECK-NEXT: //--- _ZTSZZ5test0vENK3$_0clERN2cl4sycl7handlerEE8MyKernel // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 12 }, // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 16 }, // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 20 }, // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 24 }, // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 28 }, // CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 32 }, -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 36 }, // CHECK-EMPTY: // CHECK-NEXT:}; diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp index 65a77b01f165a..f2bfe5e357041 100644 --- a/clang/test/SemaSYCL/Inputs/sycl.hpp +++ b/clang/test/SemaSYCL/Inputs/sycl.hpp @@ -37,13 +37,20 @@ enum class address_space : int { }; } // namespace access +class property_list {}; + +namespace INTEL { namespace property { -template -class buffer_location {}; +struct buffer_location { + template class instance {}; +}; } // namespace property +} // namespace INTEL +namespace ONEAPI { template -class property_list {}; +class accessor_property_list {}; +} // namespace ONEAPI namespace detail { namespace half_impl { @@ -95,7 +102,7 @@ struct DeviceValueType { template > + typename propertyListT = ONEAPI::accessor_property_list<>> class accessor { public: @@ -107,7 +114,6 @@ class accessor { using PtrType = typename DeviceValueType::type *; void __init(PtrType Ptr, range AccessRange, range MemRange, id Offset) {} - propertyListT prop_list; }; template diff --git a/clang/test/SemaSYCL/accessor_inheritance.cpp b/clang/test/SemaSYCL/accessor_inheritance.cpp index 78bdddda98945..b481d34f4ba70 100644 --- a/clang/test/SemaSYCL/accessor_inheritance.cpp +++ b/clang/test/SemaSYCL/accessor_inheritance.cpp @@ -42,8 +42,8 @@ int main() { // CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_A' 'int' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' // CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_B' 'int' -// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::accessor':'cl::sycl::accessor>' 'void () noexcept' -// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::accessor':'cl::sycl::accessor>' 'void () noexcept' +// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::accessor':'cl::sycl::accessor>' 'void () noexcept' +// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::accessor':'cl::sycl::accessor>' 'void () noexcept' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' // CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_C' 'int' diff --git a/clang/test/SemaSYCL/basic-kernel-wrapper.cpp b/clang/test/SemaSYCL/basic-kernel-wrapper.cpp index 7fe3c8255153d..800b8ce030787 100644 --- a/clang/test/SemaSYCL/basic-kernel-wrapper.cpp +++ b/clang/test/SemaSYCL/basic-kernel-wrapper.cpp @@ -43,7 +43,7 @@ int main() { // CHECK: CXXMemberCallExpr {{.*}} 'void' // CHECK-NEXT: MemberExpr {{.*}} 'void ({{.*}}PtrType, range<1>, range<1>, id<1>)' lvalue .__init -// CHECK-NEXT: MemberExpr {{.*}} 'cl::sycl::accessor':'cl::sycl::accessor>' lvalue . +// CHECK-NEXT: MemberExpr {{.*}} 'cl::sycl::accessor':'cl::sycl::accessor>' lvalue . // CHECK-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}basic-kernel-wrapper.cpp{{.*}})' lvalue Var // CHECK-NEXT: ImplicitCastExpr {{.*}} diff --git a/clang/test/SemaSYCL/buffer_location.cpp b/clang/test/SemaSYCL/buffer_location.cpp index 2d59fc819e1b0..ac3724d32ef0f 100644 --- a/clang/test/SemaSYCL/buffer_location.cpp +++ b/clang/test/SemaSYCL/buffer_location.cpp @@ -9,22 +9,24 @@ template class another_property_list { }; +template +using buffer_location = cl::sycl::INTEL::property::buffer_location::instance; + struct Base { int A, B; cl::sycl::accessor>> + cl::sycl::ONEAPI::accessor_property_list>> AccField; }; -struct Captured : Base, - cl::sycl::accessor>> { +struct Captured + : Base, + cl::sycl::accessor>> { int C; }; @@ -35,28 +37,28 @@ int main() { cl::sycl::accessor>> + cl::sycl::ONEAPI::accessor_property_list>> // CHECK: SYCLIntelBufferLocationAttr {{.*}} Implicit 2 accessorA; cl::sycl::accessor>> + cl::sycl::ONEAPI::accessor_property_list< + another_property, + buffer_location<3>>> // CHECK: SYCLIntelBufferLocationAttr {{.*}} Implicit 3 accessorB; cl::sycl::accessor> + cl::sycl::ONEAPI::accessor_property_list< + another_property>> accessorC; #else cl::sycl::accessor>> + cl::sycl::ONEAPI::accessor_property_list>> accessorD; cl::sycl::accessor, - cl::sycl::property::buffer_location<2>>> + cl::sycl::ONEAPI::accessor_property_list< + buffer_location<1>, + buffer_location<2>>> accessorF; #endif cl::sycl::kernel_single_task( @@ -82,7 +84,7 @@ int main() { #else //expected-error@+1{{buffer_location template parameter must be a non-negative integer}} accessorD.use(); - //expected-error@+1{{Fifth template parameter of the accessor must be of a property_list type}} + //expected-error@+1{{sixth template parameter of the accessor must be of accessor_property_list type}} accessorE.use(); //expected-error@+1{{Can't apply buffer_location property twice to the same accessor}} accessorF.use(); diff --git a/clang/test/SemaSYCL/wrapped-accessor.cpp b/clang/test/SemaSYCL/wrapped-accessor.cpp index 42a34feb6ef47..56e977d26e47d 100644 --- a/clang/test/SemaSYCL/wrapped-accessor.cpp +++ b/clang/test/SemaSYCL/wrapped-accessor.cpp @@ -35,14 +35,14 @@ int main() { // argument // CHECK: VarDecl {{.*}}'(lambda at {{.*}}wrapped-accessor.cpp{{.*}})' // CHECK-NEXT: InitListExpr {{.*}}'(lambda at {{.*}}wrapped-accessor.cpp{{.*}})' -// CHECK-NEXT: InitListExpr {{.*}}'AccWrapper>>' -// CHECK-NEXT: CXXConstructExpr {{.*}}'cl::sycl::accessor>':'cl::sycl::accessor>' 'void () noexcept' +// CHECK-NEXT: InitListExpr {{.*}}'AccWrapper>>' +// CHECK-NEXT: CXXConstructExpr {{.*}}'cl::sycl::accessor>':'cl::sycl::accessor>' 'void () noexcept' // Check that accessor field of the wrapper object is initialized using __init method // CHECK-NEXT: CXXMemberCallExpr {{.*}} 'void' // CHECK-NEXT: MemberExpr {{.*}} 'void ({{.*}}PtrType, range<1>, range<1>, id<1>)' lvalue .__init -// CHECK-NEXT: MemberExpr {{.*}} 'cl::sycl::accessor>':'cl::sycl::accessor>' lvalue .accessor {{.*}} -// CHECK-NEXT: MemberExpr {{.*}} 'AccWrapper>>':'AccWrapper>>' lvalue . +// CHECK-NEXT: MemberExpr {{.*}} 'cl::sycl::accessor>':'cl::sycl::accessor>' lvalue .accessor {{.*}} +// CHECK-NEXT: MemberExpr {{.*}} 'AccWrapper>>':'AccWrapper>>' lvalue . // CHECK-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}wrapped-accessor.cpp{{.*}})' lvalue Var {{.*}} '(lambda at {{.*}}wrapped-accessor.cpp{{.*}})' // Parameters of the _init method From dc8a0593556f333a5a2429b5aac778f56eee22d6 Mon Sep 17 00:00:00 2001 From: mdtoguchi <47896532+mdtoguchi@users.noreply.github.com> Date: Sat, 12 Sep 2020 07:28:21 -0700 Subject: [PATCH 396/465] [Driver][SYCL][FPGA] Improve help output for aoc with -fsycl-help (#2446) aoc provides a way to emit SYCL FPGA specific options with the -help via the -sycl option. Augment the the help output behaviors to allow for an additional option that can be used with the tools. --- clang/lib/Driver/Driver.cpp | 19 ++++++++++++------- clang/test/Driver/sycl.c | 2 +- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 8ade755e5bd80..0c7f1cb5b4a22 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1788,20 +1788,21 @@ llvm::Triple Driver::MakeSYCLDeviceTriple(StringRef TargetArch) const { // Print the help from any of the given tools which are used for AOT // compilation for SYCL void Driver::PrintSYCLToolHelp(const Compilation &C) const { - SmallVector, 4> HelpArgs; + SmallVector, 4> + HelpArgs; // Populate the vector with the tools and help options if (Arg *A = C.getArgs().getLastArg(options::OPT_fsycl_help_EQ)) { StringRef AV(A->getValue()); llvm::Triple T; if (AV == "gen" || AV == "all") HelpArgs.push_back(std::make_tuple(MakeSYCLDeviceTriple("spir64_gen"), - "ocloc", "--help")); + "ocloc", "--help", "")); if (AV == "fpga" || AV == "all") - HelpArgs.push_back( - std::make_tuple(MakeSYCLDeviceTriple("spir64_fpga"), "aoc", "-help")); + HelpArgs.push_back(std::make_tuple(MakeSYCLDeviceTriple("spir64_fpga"), + "aoc", "-help", "-sycl")); if (AV == "x86_64" || AV == "all") HelpArgs.push_back(std::make_tuple(MakeSYCLDeviceTriple("spir64_x86_64"), - "opencl-aot", "--help")); + "opencl-aot", "--help", "")); if (HelpArgs.empty()) { C.getDriver().Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << AV; @@ -1814,7 +1815,8 @@ void Driver::PrintSYCLToolHelp(const Compilation &C) const { llvm::outs() << "Emitting help information for " << std::get<1>(HA) << '\n' << "Use triple of '" << std::get<0>(HA).normalize() << "' to enable ahead of time compilation\n"; - std::vector ToolArgs = { std::get<1>(HA), std::get<2>(HA) }; + std::vector ToolArgs = {std::get<1>(HA), std::get<2>(HA), + std::get<3>(HA)}; SmallString<128> ExecPath( C.getDefaultToolChain().GetProgramPath(std::get<1>(HA).data())); auto ToolBinary = llvm::sys::findProgramByName(ExecPath); @@ -1824,7 +1826,10 @@ void Driver::PrintSYCLToolHelp(const Compilation &C) const { } // do not run the tools with -###. if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) { - llvm::errs() << "\"" << ExecPath << "\" \"" << ToolArgs[1] << "\"\n"; + llvm::errs() << "\"" << ExecPath << "\" \"" << ToolArgs[1] << "\""; + if (!ToolArgs[2].empty()) + llvm::errs() << " \"" << ToolArgs[2] << "\""; + llvm::errs() << "\n"; continue; } // Run the Tool. diff --git a/clang/test/Driver/sycl.c b/clang/test/Driver/sycl.c index 13c8fc0592690..33a04d091c98a 100644 --- a/clang/test/Driver/sycl.c +++ b/clang/test/Driver/sycl.c @@ -75,7 +75,7 @@ // SYCL-HELP-BADARG: unsupported argument 'foo' to option 'fsycl-help=' // SYCL-HELP-GEN: Emitting help information for ocloc // SYCL-HELP-GEN: Use triple of 'spir64_gen-unknown-unknown-sycldevice' to enable ahead of time compilation -// SYCL-HELP-FPGA-OUT: "[[DIR]]{{[/\\]+}}aoc" "-help" +// SYCL-HELP-FPGA-OUT: "[[DIR]]{{[/\\]+}}aoc" "-help" "-sycl" // SYCL-HELP-FPGA: Emitting help information for aoc // SYCL-HELP-FPGA: Use triple of 'spir64_fpga-unknown-unknown-sycldevice' to enable ahead of time compilation // SYCL-HELP-CPU: Emitting help information for opencl-aot From 52c753b2c02f25a7cfb5d7429cef913e078ac32d Mon Sep 17 00:00:00 2001 From: amochalo Date: Tue, 25 Aug 2020 16:16:37 +0300 Subject: [PATCH 397/465] Add Constrained Floating-Point Intrinsics support Add llvm.experimental.constrained.* intrinsic support, namely : fadd, fsub, fmul, fdiv, frem, fma, fmulladd, fptoui, fptosi, uitofp, sitofp, fptrunc, fpext, fcmp, fcmps. These intrinsics are not actually supported in SPIR-V, but just being emulated via existing functionality with loss of "constrained" semantic Signed-off-by: amochalo --- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 132 ++++++++++++++++++++ llvm-spirv/lib/SPIRV/SPIRVWriter.h | 3 + llvm-spirv/test/constrained_arifm_intr.ll | 87 +++++++++++++ llvm-spirv/test/constrained_cmp_intr.ll | 69 ++++++++++ llvm-spirv/test/constrained_convert_intr.ll | 81 ++++++++++++ 5 files changed, 372 insertions(+) create mode 100644 llvm-spirv/test/constrained_arifm_intr.ll create mode 100644 llvm-spirv/test/constrained_cmp_intr.ll create mode 100644 llvm-spirv/test/constrained_convert_intr.ll diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 7c9d6da416604..31917c6141ceb 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -1957,6 +1957,21 @@ bool LLVMToSPIRV::isKnownIntrinsic(Intrinsic::ID Id) { case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::expect: + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_uitofp: + case Intrinsic::experimental_constrained_sitofp: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: + case Intrinsic::experimental_constrained_fcmp: + case Intrinsic::experimental_constrained_fcmps: + case Intrinsic::experimental_constrained_fmuladd: case Intrinsic::fmuladd: case Intrinsic::memset: case Intrinsic::memcpy: @@ -1978,6 +1993,24 @@ bool LLVMToSPIRV::isKnownIntrinsic(Intrinsic::ID Id) { } } +// Performs mapping of LLVM IR rounding mode to SPIR-V rounding mode +// Value *V is metadata argument of +// llvm.experimental.constrained.* intrinsics +SPIRVInstruction * +LLVMToSPIRV::applyRoundingModeConstraint(Value *V, SPIRVInstruction *I) { + StringRef RMode = + cast(cast(V)->getMetadata())->getString(); + if (RMode.endswith("tonearest")) + I->addFPRoundingMode(FPRoundingModeRTE); + else if (RMode.endswith("towardzero")) + I->addFPRoundingMode(FPRoundingModeRTZ); + else if (RMode.endswith("upward")) + I->addFPRoundingMode(FPRoundingModeRTP); + else if (RMode.endswith("downward")) + I->addFPRoundingMode(FPRoundingModeRTN); + return I; +} + SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, SPIRVBasicBlock *BB) { auto GetMemoryAccess = [](MemIntrinsic *MI) -> std::vector { @@ -2065,6 +2098,105 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, } return Value; } + case Intrinsic::experimental_constrained_fadd: { + auto BI = BM->addBinaryInst(OpFAdd, transType(II->getType()), + transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + return applyRoundingModeConstraint(II->getOperand(2), BI); + } + case Intrinsic::experimental_constrained_fsub: { + auto BI = BM->addBinaryInst(OpFSub, transType(II->getType()), + transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + return applyRoundingModeConstraint(II->getOperand(2), BI); + } + case Intrinsic::experimental_constrained_fmul: { + auto BI = BM->addBinaryInst(OpFMul, transType(II->getType()), + transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + return applyRoundingModeConstraint(II->getOperand(2), BI); + } + case Intrinsic::experimental_constrained_fdiv: { + auto BI = BM->addBinaryInst(OpFDiv, transType(II->getType()), + transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + return applyRoundingModeConstraint(II->getOperand(2), BI); + } + case Intrinsic::experimental_constrained_frem: { + auto BI = BM->addBinaryInst(OpFRem, transType(II->getType()), + transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + return applyRoundingModeConstraint(II->getOperand(2), BI); + } + case Intrinsic::experimental_constrained_fma: { + std::vector Args{transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), + transValue(II->getArgOperand(2), BB)}; + auto BI = BM->addExtInst(transType(II->getType()), + BM->getExtInstSetId(SPIRVEIS_OpenCL), + OpenCLLIB::Fma, Args, BB); + return applyRoundingModeConstraint(II->getOperand(3), BI); + } + case Intrinsic::experimental_constrained_fptoui: { + return BM->addUnaryInst(OpConvertFToU, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + } + case Intrinsic::experimental_constrained_fptosi: { + return BM->addUnaryInst(OpConvertFToS, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + } + case Intrinsic::experimental_constrained_uitofp: { + auto BI = BM->addUnaryInst(OpConvertUToF, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + return applyRoundingModeConstraint(II->getOperand(1), BI); + } + case Intrinsic::experimental_constrained_sitofp: { + auto BI = BM->addUnaryInst(OpConvertSToF, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + return applyRoundingModeConstraint(II->getOperand(1), BI); + } + case Intrinsic::experimental_constrained_fpext: { + return BM->addUnaryInst(OpFConvert, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + } + case Intrinsic::experimental_constrained_fptrunc: { + auto BI = BM->addUnaryInst(OpFConvert, transType(II->getType()), + transValue(II->getArgOperand(0), BB), BB); + return applyRoundingModeConstraint(II->getOperand(1), BI); + } + case Intrinsic::experimental_constrained_fcmp: + case Intrinsic::experimental_constrained_fcmps: { + auto MetaMod = cast(II->getOperand(2))->getMetadata(); + Op CmpTypeOp = StringSwitch(cast(MetaMod)->getString()) + .Case("oeq", OpFOrdEqual) + .Case("ogt", OpFOrdGreaterThan) + .Case("oge", OpFOrdGreaterThanEqual) + .Case("olt", OpFOrdLessThan) + .Case("ole", OpFOrdLessThanEqual) + .Case("one", OpFOrdNotEqual) + .Case("ord", OpOrdered) + .Case("ueq", OpFUnordEqual) + .Case("ugt", OpFUnordGreaterThan) + .Case("uge", OpFUnordGreaterThanEqual) + .Case("ult", OpFUnordLessThan) + .Case("ule", OpFUnordLessThanEqual) + .Case("une", OpFUnordNotEqual) + .Case("uno", OpUnordered) + .Default(OpNop); + assert(CmpTypeOp != OpNop && "Invalid condition code!"); + return BM->addCmpInst(CmpTypeOp, transType(II->getType()), + transValue(II->getOperand(0), BB), + transValue(II->getOperand(1), BB), BB); + } + case Intrinsic::experimental_constrained_fmuladd: { + SPIRVType *Ty = transType(II->getType()); + SPIRVValue *Mul = + BM->addBinaryInst(OpFMul, Ty, transValue(II->getArgOperand(0), BB), + transValue(II->getArgOperand(1), BB), BB); + auto BI = BM->addBinaryInst(OpFAdd, Ty, Mul, + transValue(II->getArgOperand(2), BB), BB); + return applyRoundingModeConstraint(II->getOperand(3), BI); + } case Intrinsic::fmuladd: { // For llvm.fmuladd.* fusion is not guaranteed. If a fused multiply-add // is required the corresponding llvm.fma.* intrinsic function should be diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.h b/llvm-spirv/lib/SPIRV/SPIRVWriter.h index 0dc93519b21f4..40f8b991324b6 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.h +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.h @@ -133,6 +133,9 @@ class LLVMToSPIRV : public ModulePass { FuncTransMode FuncTrans = FuncTransMode::Decl); void transGlobalIOPipeStorage(GlobalVariable *V, MDNode *IO); + static SPIRVInstruction *applyRoundingModeConstraint(Value *V, + SPIRVInstruction *I); + typedef DenseMap LLVMToSPIRVTypeMap; typedef DenseMap LLVMToSPIRVValueMap; typedef DenseMap LLVMToSPIRVMetadataMap; diff --git a/llvm-spirv/test/constrained_arifm_intr.ll b/llvm-spirv/test/constrained_arifm_intr.ll new file mode 100644 index 0000000000000..4ccce47ce893f --- /dev/null +++ b/llvm-spirv/test/constrained_arifm_intr.ll @@ -0,0 +1,87 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + + +; CHECK: Name [[ad:[0-9]+]] "add" +; CHECK: Name [[di:[0-9]+]] "div" +; CHECK: Name [[su:[0-9]+]] "sub" +; CHECK: Name [[mu:[0-9]+]] "mul" + +; CHECK-NOT: Decorate {{[0-9]+}} FPRoundingMode + +; CHECK: Decorate [[ad]] FPRoundingMode 0 +; CHECK: Decorate [[di]] FPRoundingMode 1 +; CHECK: Decorate [[su]] FPRoundingMode 2 +; CHECK: Decorate [[mu]] FPRoundingMode 3 + +; CHECK-NOT: Decorate {{[0-9]+}} FPRoundingMode + +; CHECK: FAdd {{[0-9]+}} [[ad]] +; CHECK: FDiv {{[0-9]+}} [[di]] +; CHECK: FSub {{[0-9]+}} [[su]] +; CHECK: FMul {{[0-9]+}} [[mu]] +; CHECK: FMul +; CHECK: FAdd +; CHECK: ExtInst {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} fma +; CHECK: FRem + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-linux-sycldevice" + +; Function Attrs: norecurse nounwind strictfp +define dso_local spir_kernel void @test(float %a, i32 %in, i32 %ui) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_buffer_location !9 { +entry: + %add = tail call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #2 + %div = tail call float @llvm.experimental.constrained.fdiv.f32(float %add, float %add, metadata !"round.towardzero", metadata !"fpexcept.strict") #2, !fpmath !10 + %sub = tail call float @llvm.experimental.constrained.fsub.f32(float %div, float %div, metadata !"round.upward", metadata !"fpexcept.strict") #2 + %mul = tail call float @llvm.experimental.constrained.fmul.f32(float %sub, float %sub, metadata !"round.downward", metadata !"fpexcept.strict") #2 + %0 = tail call float @llvm.experimental.constrained.fmuladd.f32(float %mul, float %mul, float %mul, metadata !"round.tonearestaway", metadata !"fpexcept.strict") #2 + %1 = tail call float @llvm.experimental.constrained.fma.f32(float %0, float %0, float %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #2 + %2 = tail call float @llvm.experimental.constrained.frem.f32(float %1, float %1, metadata !"round.dynamic", metadata !"fpexcept.strict") #2 + ret void +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fmuladd.f32(float, float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata) #1 + +attributes #0 = { norecurse nounwind strictfp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test2.cl" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { inaccessiblememonly nounwind willreturn } +attributes #2 = { strictfp } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!2, !2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 0} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 12.0.0 (https://github.com/c199914007/llvm.git f0c85a8adeb49638c01eee1451aa9b35462cbfd5)"} +!5 = !{i32 0, i32 0, i32 0} +!6 = !{!"none", !"none", !"none"} +!7 = !{!"float", !"int", !"uint"} +!8 = !{!"", !"", !""} +!9 = !{i32 -1, i32 -1, i32 -1} +!10 = !{float 2.500000e+00} diff --git a/llvm-spirv/test/constrained_cmp_intr.ll b/llvm-spirv/test/constrained_cmp_intr.ll new file mode 100644 index 0000000000000..bb34d32ff41d8 --- /dev/null +++ b/llvm-spirv/test/constrained_cmp_intr.ll @@ -0,0 +1,69 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + +;CHECK: FOrdEqual +;CHECK: FOrdGreaterThan +;CHECK: FOrdGreaterThanEqual +;CHECK: FOrdLessThan +;CHECK: FOrdLessThanEqual +;CHECK: FOrdNotEqual +;CHECK: Ordered +;CHECK: FUnordEqual +;CHECK: FUnordGreaterThan +;CHECK: FUnordGreaterThanEqual +;CHECK: FUnordLessThan +;CHECK: FUnordLessThanEqual +;CHECK: FUnordNotEqual +;CHECK: Unordered + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-linux-sycldevice" + +; Function Attrs: norecurse nounwind strictfp +define dso_local spir_kernel void @test(float %a) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_buffer_location !9 { +entry: + %cmp = tail call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %a, metadata !"oeq", metadata !"fpexcept.strict") #2 + %cmp1 = tail call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %a, metadata !"ogt", metadata !"fpexcept.strict") #2 + %cmp2 = tail call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %a, metadata !"oge", metadata !"fpexcept.strict") #2 + %cmp3 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"olt", metadata !"fpexcept.strict") #2 + %cmp4 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ole", metadata !"fpexcept.strict") #2 + %cmp5 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"one", metadata !"fpexcept.strict") #2 + %cmp6 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ord", metadata !"fpexcept.strict") #2 + %cmp7 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ueq", metadata !"fpexcept.strict") #2 + %cmp8 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ugt", metadata !"fpexcept.strict") #2 + %cmp9 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"uge", metadata !"fpexcept.strict") #2 + %cmp10 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ult", metadata !"fpexcept.strict") #2 + %cmp11 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"ule", metadata !"fpexcept.strict") #2 + %cmp12 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"une", metadata !"fpexcept.strict") #2 + %cmp13 = tail call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %a, metadata !"uno", metadata !"fpexcept.strict") #2 + ret void +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) #1 + +attributes #0 = { norecurse nounwind strictfp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test2.cl" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { inaccessiblememonly nounwind willreturn } +attributes #2 = { strictfp } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!2, !2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 0} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 12.0.0 (https://github.com/c199914007/llvm.git f0c85a8adeb49638c01eee1451aa9b35462cbfd5)"} +!5 = !{i32 0} +!6 = !{!"none"} +!7 = !{!"float"} +!8 = !{!""} +!9 = !{i32 -1} diff --git a/llvm-spirv/test/constrained_convert_intr.ll b/llvm-spirv/test/constrained_convert_intr.ll new file mode 100644 index 0000000000000..7547bae2df6a9 --- /dev/null +++ b/llvm-spirv/test/constrained_convert_intr.ll @@ -0,0 +1,81 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s +; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: spirv-val %t.spv + +; CHECK: Name [[sf:[0-9]+]] "conv" +; CHECK: Name [[uf:[0-9]+]] "conv1" +; CHECK: Name [[fs:[0-9]+]] "conv2" +; CHECK: Name [[fu:[0-9]+]] "conv3" +; CHECK: Name [[fe:[0-9]+]] "conv4" +; CHECK: Name [[ft:[0-9]+]] "conv5" + +; CHECK: Decorate [[sf]] FPRoundingMode 0 +; CHECK: Decorate [[uf]] FPRoundingMode 1 + +; CHECK-NOT: Decorate [[fs]] FPRoundingMode +; CHECK-NOT: Decorate [[fu]] FPRoundingMode +; CHECK-NOT: Decorate [[fe]] FPRoundingMode + +; CHECK: Decorate [[ft]] FPRoundingMode 2 + +;CHECK: ConvertSToF {{[0-9]+}} [[sf]] +;CHECK: ConvertUToF {{[0-9]+}} [[uf]] +;CHECK: ConvertFToS {{[0-9]+}} [[fs]] +;CHECK: ConvertFToU {{[0-9]+}} [[fu]] +;CHECK: FConvert {{[0-9]+}} [[fe]] +;CHECK: FConvert {{[0-9]+}} [[ft]] + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-linux-sycldevice" + +; Function Attrs: norecurse nounwind strictfp +define dso_local spir_kernel void @test(float %a, i32 %in, i32 %ui) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_buffer_location !9 { +entry: + %conv = tail call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %in, metadata !"round.tonearest", metadata !"fpexcept.ignore") #2 + %conv1 = tail call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %ui, metadata !"round.towardzero", metadata !"fpexcept.ignore") #2 + %conv2 = tail call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %conv1, metadata !"fpexcept.ignore") #2 + %conv3 = tail call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %conv1, metadata !"fpexcept.ignore") #2 + %conv4 = tail call double @llvm.experimental.constrained.fpext.f64.f32(float %conv1, metadata !"fpexcept.ignore") #2 + %conv5 = tail call float @llvm.experimental.constrained.fptrunc.f32.f64(double %conv4, metadata !"round.upward", metadata !"fpexcept.ignore") #2 + ret void +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) #1 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) #1 + +attributes #0 = { norecurse nounwind strictfp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test2.cl" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { inaccessiblememonly nounwind willreturn } +attributes #2 = { strictfp } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.spir.version = !{!2, !2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 0} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 12.0.0 (https://github.com/c199914007/llvm.git f0c85a8adeb49638c01eee1451aa9b35462cbfd5)"} +!5 = !{i32 0, i32 0, i32 0} +!6 = !{!"none", !"none", !"none"} +!7 = !{!"float", !"int", !"uint"} +!8 = !{!"", !"", !""} +!9 = !{i32 -1, i32 -1, i32 -1} From 15dd186e74946b5a1a8107cb9509d9ca319f6485 Mon Sep 17 00:00:00 2001 From: Viktoria Maximova Date: Wed, 9 Sep 2020 11:13:17 +0300 Subject: [PATCH 398/465] Do not translate arbitrary precision operations without corresponding extensions (#714) This patch addresses [comment][1] If the appropriate extension for arbitrary precision instruction is not enabled, invalid IR will be produced. Report an error in this case. [1]: https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/653#discussion_r463629986 --- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 31 +++++++---- ...arbitrary-precision-fixed-point-numbers.ll | 20 ++------ ...lity-arbitrary-precision-floating-point.ll | 51 ++----------------- 3 files changed, 26 insertions(+), 76 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 31917c6141ceb..152d93158f91d 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -2964,17 +2964,26 @@ SPIRVInstruction *LLVMToSPIRV::transBuiltinToInst(StringRef DemangledName, !BM->isAllowedToUseExtension(ExtensionID::SPV_INTEL_blocking_pipes)) return nullptr; - if (OpFixedSqrtINTEL <= OC && OC <= OpFixedExpINTEL && - !BM->isAllowedToUseExtension( - ExtensionID::SPV_INTEL_arbitrary_precision_fixed_point)) - return nullptr; - - if (((OpArbitraryFloatSinCosPiINTEL <= OC && - OC <= OpArbitraryFloatCastToIntINTEL) || - (OpArbitraryFloatAddINTEL <= OC && OC <= OpArbitraryFloatPowNINTEL)) && - !BM->isAllowedToUseExtension( - ExtensionID::SPV_INTEL_arbitrary_precision_floating_point)) - return nullptr; + if (OpFixedSqrtINTEL <= OC && OC <= OpFixedExpINTEL) + BM->getErrorLog().checkError( + BM->isAllowedToUseExtension( + ExtensionID::SPV_INTEL_arbitrary_precision_fixed_point), + SPIRVEC_InvalidInstruction, + CI->getCalledOperand()->getName().str() + + "\nFixed point instructions can't be translated correctly without " + "enabled SPV_INTEL_arbitrary_precision_fixed_point extension!\n"); + + if ((OpArbitraryFloatSinCosPiINTEL <= OC && + OC <= OpArbitraryFloatCastToIntINTEL) || + (OpArbitraryFloatAddINTEL <= OC && OC <= OpArbitraryFloatPowNINTEL)) + BM->getErrorLog().checkError( + BM->isAllowedToUseExtension( + ExtensionID::SPV_INTEL_arbitrary_precision_floating_point), + SPIRVEC_InvalidInstruction, + CI->getCalledOperand()->getName().str() + + "\nFloating point instructions can't be translated correctly " + "without enabled SPV_INTEL_arbitrary_precision_floating_point " + "extension!\n"); auto Inst = transBuiltinToInstWithoutDecoration(OC, CI, BB); addDecorations(Inst, Dec); diff --git a/llvm-spirv/test/transcoding/capability-arbitrary-precision-fixed-point-numbers.ll b/llvm-spirv/test/transcoding/capability-arbitrary-precision-fixed-point-numbers.ll index c13b30d8083e3..6611ae2adea92 100644 --- a/llvm-spirv/test/transcoding/capability-arbitrary-precision-fixed-point-numbers.ll +++ b/llvm-spirv/test/transcoding/capability-arbitrary-precision-fixed-point-numbers.ll @@ -95,7 +95,9 @@ ; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers,+SPV_INTEL_arbitrary_precision_fixed_point -o %t.spv ; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV -; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV-NEGATIVE +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -spirv-text -o - 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; CHECK-ERROR: InvalidInstruction: Can't translate llvm instruction: +; CHECK-ERROR: Fixed point instructions can't be translated correctly without enabled SPV_INTEL_arbitrary_precision_fixed_point extension! ; RUN: llvm-spirv -r %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM @@ -106,9 +108,6 @@ ; CHECK-SPIRV: 12 Extension "SPV_INTEL_arbitrary_precision_fixed_point" ; CHECK-SPIRV: 11 Extension "SPV_INTEL_arbitrary_precision_integers" -; CHECK-SPIRV-NEGATIVE-NOT: 2 Capability ArbitraryPrecisionFixedPointINTEL -; CHECK-SPIRV-NEGATIVE-NOT: 12 Extension "SPV_INTEL_arbitrary_precision_fixed_point" - ; CHECK-SPIRV: 4 TypeInt [[Ty_8:[0-9]+]] 8 0 ; CHECK-SPIRV: 4 TypeInt [[Ty_13:[0-9]+]] 13 0 ; CHECK-SPIRV: 4 TypeInt [[Ty_5:[0-9]+]] 5 0 @@ -128,53 +127,40 @@ ; CHECK-SPIRV: 6 Load [[Ty_13]] [[Sqrt_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSqrtINTEL [[Ty_5]] [[#]] [[Sqrt_InId]] 0 2 2 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSqrtINTEL ; CHECK-SPIRV: 6 Load [[Ty_5]] [[Sqrt_InId_B:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSqrtINTEL [[Ty_13]] [[#]] [[Sqrt_InId_B]] 0 2 2 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSqrtINTEL ; CHECK-SPIRV: 6 Load [[Ty_5]] [[Sqrt_InId_C:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSqrtINTEL [[Ty_13]] [[#]] [[Sqrt_InId_C]] 0 2 2 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSqrtINTEL ; CHECK-SPIRV: 6 Load [[Ty_3]] [[Recip_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedRecipINTEL [[Ty_8]] [[#]] [[Recip_InId]] 1 4 4 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedRecipINTEL ; CHECK-SPIRV: 6 Load [[Ty_11]] [[Rsqrt_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedRsqrtINTEL [[Ty_10]] [[#]] [[Rsqrt_InId]] 0 8 6 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedRsqrtINTEL ; CHECK-SPIRV: 6 Load [[Ty_17]] [[Sin_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSinINTEL [[Ty_11]] [[#]] [[Sin_InId]] 1 7 5 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSinINTEL ; CHECK-SPIRV: 6 Load [[Ty_35]] [[Cos_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedCosINTEL [[Ty_28]] [[#]] [[Cos_InId]] 0 9 3 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedCosINTEL ; CHECK-SPIRV: 6 Load [[Ty_31]] [[SinCos_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSinCosINTEL [[Ty_40]] [[#]] [[SinCos_InId]] 1 10 12 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSinCosINTEL ; CHECK-SPIRV: 6 Load [[Ty_60]] [[SinPi_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSinPiINTEL [[Ty_5]] [[#]] [[SinPi_InId]] 0 2 2 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSinPiINTEL ; CHECK-SPIRV: 6 Load [[Ty_28]] [[CosPi_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedCosPiINTEL [[Ty_16]] [[#]] [[CosPi_InId]] 0 8 5 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedCosPiINTEL ; CHECK-SPIRV: 6 Load [[Ty_13]] [[SinCosPi_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedSinCosPiINTEL [[Ty_10]] [[#]] [[SinCosPi_InId]] 0 2 2 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedSinCosPiINTEL ; CHECK-SPIRV: 6 Load [[Ty_64]] [[Log_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedLogINTEL [[Ty_44]] [[#]] [[Log_InId]] 1 24 22 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedLogINTEL ; CHECK-SPIRV: 6 Load [[Ty_44]] [[Exp_InId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 FixedExpINTEL [[Ty_34]] [[#]] [[Exp_InId]] 0 20 20 0 0 -; CHECK-SPIRV-NEGATIVE-NOT: 9 FixedExpINTEL ; CHECK-LLVM: call i5 @intel_arbitrary_fixed_sqrt.i5.i13(i13 %[[#]], i1 false, i32 2, i32 2, i32 0, i32 0) ; CHECK-LLVM: call i13 @intel_arbitrary_fixed_sqrt.i13.i5(i5 %[[#]], i1 false, i32 2, i32 2, i32 0, i32 0) diff --git a/llvm-spirv/test/transcoding/capability-arbitrary-precision-floating-point.ll b/llvm-spirv/test/transcoding/capability-arbitrary-precision-floating-point.ll index 0e24d4f8186f1..b6a295c4839ce 100644 --- a/llvm-spirv/test/transcoding/capability-arbitrary-precision-floating-point.ll +++ b/llvm-spirv/test/transcoding/capability-arbitrary-precision-floating-point.ll @@ -403,7 +403,9 @@ ; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers,+SPV_INTEL_arbitrary_precision_floating_point -o %t.spv ; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV -; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV-NEGATIVE +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -spirv-text -o - 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; CHECK-ERROR: InvalidInstruction: Can't translate llvm instruction: +; CHECK-ERROR: Floating point instructions can't be translated correctly without enabled SPV_INTEL_arbitrary_precision_floating_point extension! ; RUN: llvm-spirv -r %t.spv -o %t.r.bc ; RUN: llvm-dis < %t.r.bc | FileCheck %s --check-prefix=CHECK-LLVM @@ -414,9 +416,6 @@ ; CHECK-SPIRV: 13 Extension "SPV_INTEL_arbitrary_precision_floating_point" ; CHECK-SPIRV: 11 Extension "SPV_INTEL_arbitrary_precision_integers" -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] Capability ArbitraryPrecisionFloatingPointINTEL -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] Extension "SPV_INTEL_arbitrary_precision_floating_point" - ; CHECK-SPIRV: 4 TypeInt [[Ty_8:[0-9]+]] 8 0 ; CHECK-SPIRV: 4 TypeInt [[Ty_40:[0-9]+]] 40 0 ; CHECK-SPIRV: 4 TypeInt [[Ty_43:[0-9]+]] 43 0 @@ -549,7 +548,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_castILi11ELi28ELi9ELi %6 = call spir_func i40 @_Z31__spirv_ArbitraryFloatCastINTELILi40ELi40EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i40 %5, i32 28, i32 30, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_40]] [[Cast_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatCastINTEL [[Ty_40]] [[#]] [[Cast_AId]] 28 30 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCastINTEL ; CHECK-LLVM: call i40 @intel_arbitrary_float_cast.i40.i40(i40 %[[#]], i32 28, i32 30, i32 0, i32 2, i32 1) store i40 %6, i40* %2, align 8, !tbaa !9 %7 = bitcast i40* %2 to i8* @@ -571,7 +569,6 @@ define linkonce_odr dso_local spir_func void @_Z22ap_float_cast_from_intILi43ELi %6 = call spir_func signext i25 @_Z38__spirv_ArbitraryFloatCastFromIntINTELILi43ELi25EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiii(i43 %5, i32 16, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_43]] [[CastFromInt_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 8 ArbitraryFloatCastFromIntINTEL [[Ty_25]] [[#]] [[CastFromInt_AId]] 16 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCastFromIntINTEL ; CHECK-LLVM: call i25 @intel_arbitrary_float_cast_from_int.i25.i43(i43 %[[#]], i32 16, i32 0, i32 2, i32 1) store i25 %6, i25* %2, align 4, !tbaa !13 %7 = bitcast i25* %2 to i8* @@ -593,7 +590,6 @@ define linkonce_odr dso_local spir_func void @_Z20ap_float_cast_to_intILi7ELi15E %6 = call spir_func signext i30 @_Z36__spirv_ArbitraryFloatCastToIntINTELILi23ELi30EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiii(i23 signext %5, i32 15, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_23]] [[CastToInt_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 8 ArbitraryFloatCastToIntINTEL [[Ty_30]] [[#]] [[CastToInt_AId]] 15 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCastToIntINTEL ; CHECK-LLVM: call i30 @intel_arbitrary_float_cast_to_int.i30.i23(i23 %[[#]], i32 15, i32 0, i32 2, i32 1) store i30 %6, i30* %2, align 4, !tbaa !17 %7 = bitcast i30* %2 to i8* @@ -627,7 +623,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_addILi5ELi7ELi6ELi8EL ; CHECK-SPIRV: 6 Load [[Ty_13]] [[Add1_A1Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_15]] [[Add1_B1Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatAddINTEL [[Ty_14]] [[#]] [[Add1_A1Id]] 7 [[Add1_B1Id]] 8 9 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatAddINTEL ; CHECK-LLVM: call i14 @intel_arbitrary_float_add.i14.i13.i15(i13 %[[#]], i32 7, i15 %[[#]], i32 8, i32 9, i32 0, i32 2, i32 1) store i14 %14, i14* %5, align 2, !tbaa !23 %15 = bitcast i14* %6 to i8* @@ -638,7 +633,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_addILi5ELi7ELi6ELi8EL ; CHECK-SPIRV: 6 Load [[Ty_13]] [[Add1_A2Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_15]] [[Add1_B2Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatAddINTEL [[Ty_14]] [[#]] [[Add1_A2Id]] 7 [[Add1_B2Id]] 8 9 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatAddINTEL ; CHECK-LLVM: call i14 @intel_arbitrary_float_add.i14.i13.i15(i13 %[[#]], i32 7, i15 %[[#]], i32 8, i32 9, i32 0, i32 2, i32 1) store i14 %18, i14* %6, align 2, !tbaa !23 %19 = bitcast i14* %6 to i8* @@ -680,7 +674,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_addILi6ELi8ELi4ELi9EL ; CHECK-SPIRV: 6 Load [[Ty_15]] [[Add2_A1Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_14]] [[Add2_B1Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatAddINTEL [[Ty_13]] [[#]] [[Add2_A1Id]] 8 [[Add2_B1Id]] 9 7 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatAddINTEL ; CHECK-LLVM: call i13 @intel_arbitrary_float_add.i13.i15.i14(i15 %[[#]], i32 8, i14 %[[#]], i32 9, i32 7, i32 0, i32 2, i32 1) store i13 %14, i13* %5, align 2, !tbaa !19 %15 = bitcast i13* %6 to i8* @@ -691,7 +684,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_addILi6ELi8ELi4ELi9EL ; CHECK-SPIRV: 6 Load [[Ty_15]] [[Add2_A2Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_14]] [[Add2_B2Id:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatAddINTEL [[Ty_13]] [[#]] [[Add2_A2Id]] 8 [[Add2_B2Id]] 9 7 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatAddINTEL ; CHECK-LLVM: call i13 @intel_arbitrary_float_add.i13.i15.i14(i15 %[[#]], i32 8, i14 %[[#]], i32 9, i32 7, i32 0, i32 2, i32 1) store i13 %18, i13* %6, align 2, !tbaa !19 %19 = bitcast i13* %6 to i8* @@ -726,7 +718,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_subILi4ELi4ELi5ELi5EL ; CHECK-SPIRV: 6 Load [[Ty_9]] [[Sub_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_11]] [[Sub_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatSubINTEL [[Ty_13]] [[#]] [[Sub_AId]] 4 [[Sub_BId]] 5 6 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSubINTEL ; CHECK-LLVM: call i13 @intel_arbitrary_float_sub.i13.i9.i11(i9 %[[#]], i32 4, i11 %[[#]], i32 5, i32 6, i32 0, i32 2, i32 1) store i13 %9, i13* %3, align 2, !tbaa !19 %10 = bitcast i13* %3 to i8* @@ -755,7 +746,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_mulILi16ELi34ELi16ELi ; CHECK-SPIRV: 6 Load [[Ty_51]] [[Mul_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_51]] [[Mul_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatMulINTEL [[Ty_51]] [[#]] [[Mul_AId]] 34 [[Mul_BId]] 34 34 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatMulINTEL ; CHECK-LLVM: call i51 @intel_arbitrary_float_mul.i51.i51.i51(i51 %[[#]], i32 34, i51 %[[#]], i32 34, i32 34, i32 0, i32 2, i32 1) store i51 %9, i51* %3, align 8, !tbaa !29 %10 = bitcast i51* %3 to i8* @@ -784,7 +774,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_divILi4ELi11ELi4ELi11 ; CHECK-SPIRV: 6 Load [[Ty_16]] [[Div_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_16]] [[Div_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatDivINTEL [[Ty_18]] [[#]] [[Div_AId]] 11 [[Div_BId]] 11 12 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatDivINTEL ; CHECK-LLVM: call i18 @intel_arbitrary_float_div.i18.i16.i16(i16 %[[#]], i32 11, i16 %[[#]], i32 11, i32 12, i32 0, i32 2, i32 1) store i18 %9, i18* %3, align 4, !tbaa !33 %10 = bitcast i18* %3 to i8* @@ -812,7 +801,6 @@ define linkonce_odr dso_local spir_func void @_Z11ap_float_gtILi20ELi42ELi21ELi4 ; CHECK-SPIRV: 6 Load [[Ty_63]] [[GT_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_63]] [[GT_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 7 ArbitraryFloatGTINTEL [[Ty_Bool]] [[#]] [[GT_AId]] 42 [[GT_BId]] 41 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatGTINTEL ; CHECK-LLVM: call i1 @intel_arbitrary_float_gt.i1.i63.i63(i63 %[[#]], i32 42, i63 %[[#]], i32 41) %9 = zext i1 %8 to i8 store i8 %9, i8* %3, align 1, !tbaa !37 @@ -840,7 +828,6 @@ define linkonce_odr dso_local spir_func void @_Z11ap_float_geILi19ELi27ELi19ELi2 ; CHECK-SPIRV: 6 Load [[Ty_47]] [[GE_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_47]] [[GE_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 7 ArbitraryFloatGEINTEL [[Ty_Bool]] [[#]] [[GE_AId]] 27 [[GE_BId]] 27 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatGEINTEL ; CHECK-LLVM: call i1 @intel_arbitrary_float_ge.i1.i47.i47(i47 %[[#]], i32 27, i47 %[[#]], i32 27) %9 = zext i1 %8 to i8 store i8 %9, i8* %3, align 1, !tbaa !37 @@ -868,7 +855,6 @@ define linkonce_odr dso_local spir_func void @_Z11ap_float_ltILi2ELi2ELi3ELi3EEv ; CHECK-SPIRV: 6 Load [[Ty_5]] [[LT_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_7]] [[LT_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 7 ArbitraryFloatLTINTEL [[Ty_Bool]] [[#]] [[LT_AId]] 2 [[LT_BId]] 3 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLTINTEL ; CHECK-LLVM: call i1 @intel_arbitrary_float_lt.i1.i5.i7(i5 %[[#]], i32 2, i7 %[[#]], i32 3) %9 = zext i1 %8 to i8 store i8 %9, i8* %3, align 1, !tbaa !37 @@ -896,7 +882,6 @@ define linkonce_odr dso_local spir_func void @_Z11ap_float_leILi27ELi27ELi26ELi2 ; CHECK-SPIRV: 6 Load [[Ty_55]] [[LE_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_55]] [[LE_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 7 ArbitraryFloatLEINTEL [[Ty_Bool]] [[#]] [[LE_AId]] 27 [[LE_BId]] 28 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLEINTEL ; CHECK-LLVM: call i1 @intel_arbitrary_float_le.i1.i55.i55(i55 %[[#]], i32 27, i55 %[[#]], i32 28) %9 = zext i1 %8 to i8 store i8 %9, i8* %3, align 1, !tbaa !37 @@ -924,7 +909,6 @@ define linkonce_odr dso_local spir_func void @_Z11ap_float_eqILi7ELi12ELi7ELi7EE ; CHECK-SPIRV: 6 Load [[Ty_20]] [[EQ_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_15]] [[EQ_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 7 ArbitraryFloatEQINTEL [[Ty_Bool]] [[#]] [[EQ_AId]] 12 [[EQ_BId]] 7 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatEQINTEL ; CHECK-LLVM: call i1 @intel_arbitrary_float_eq.i1.i20.i15(i20 %[[#]], i32 12, i15 %[[#]], i32 7) %9 = zext i1 %8 to i8 store i8 %9, i8* %3, align 1, !tbaa !37 @@ -948,7 +932,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_recipILi9ELi29ELi9ELi %6 = call spir_func i39 @_Z32__spirv_ArbitraryFloatRecipINTELILi39ELi39EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i39 %5, i32 29, i32 29, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_39]] [[Recip_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatRecipINTEL [[Ty_39]] [[#]] [[Recip_AId]] 29 29 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatRecipINTEL ; CHECK-LLVM: call i39 @intel_arbitrary_float_recip.i39.i39(i39 %[[#]], i32 29, i32 29, i32 0, i32 2, i32 1) store i39 %6, i39* %2, align 8, !tbaa !49 %7 = bitcast i39* %2 to i8* @@ -970,7 +953,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_rsqrtILi12ELi19ELi13E %6 = call spir_func i34 @_Z32__spirv_ArbitraryFloatRSqrtINTELILi32ELi34EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i32 %5, i32 19, i32 20, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_32]] [[Rsqrt_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatRSqrtINTEL [[Ty_34]] [[#]] [[Rsqrt_AId]] 19 20 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatRSqrtINTEL ; CHECK-LLVM: call i34 @intel_arbitrary_float_rsqrt.i34.i32(i32 %[[#]], i32 19, i32 20, i32 0, i32 2, i32 1) store i34 %6, i34* %2, align 8, !tbaa !53 %7 = bitcast i34* %2 to i8* @@ -992,7 +974,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_cbrtILi0ELi1ELi0ELi1E %6 = call spir_func signext i2 @_Z31__spirv_ArbitraryFloatCbrtINTELILi2ELi2EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i2 signext %5, i32 1, i32 1, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_2]] [[Cbrt_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatCbrtINTEL [[Ty_2]] [[#]] [[Cbrt_AId]] 1 1 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCbrtINTEL ; CHECK-LLVM: call i2 @intel_arbitrary_float_cbrt.i2.i2(i2 %[[#]], i32 1, i32 1, i32 0, i32 2, i32 1) store i2 %6, i2* %2, align 1, !tbaa !55 %7 = bitcast i2* %2 to i8* @@ -1019,7 +1000,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_hypotILi20ELi20ELi21E ; CHECK-SPIRV: 6 Load [[Ty_41]] [[Hypot_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_43]] [[Hypot_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatHypotINTEL [[Ty_42]] [[#]] [[Hypot_AId]] 20 [[Hypot_BId]] 21 22 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatHypotINTEL ; CHECK-LLVM: call i42 @intel_arbitrary_float_hypot.i42.i41.i43(i41 %[[#]], i32 20, i43 %[[#]], i32 21, i32 22, i32 0, i32 2, i32 1) store i42 %9, i42* %3, align 8, !tbaa !59 %10 = bitcast i42* %3 to i8* @@ -1043,7 +1023,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_sqrtILi7ELi7ELi8ELi8E %6 = call spir_func signext i17 @_Z31__spirv_ArbitraryFloatSqrtINTELILi15ELi17EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i15 signext %5, i32 7, i32 8, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_15]] [[Sqrt_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatSqrtINTEL [[Ty_17]] [[#]] [[Sqrt_AId]] 7 8 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSqrtINTEL ; CHECK-LLVM: call i17 @intel_arbitrary_float_sqrt.i17.i15(i15 %[[#]], i32 7, i32 8, i32 0, i32 2, i32 1) store i17 %6, i17* %2, align 4, !tbaa !61 %7 = bitcast i17* %2 to i8* @@ -1065,7 +1044,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_logILi30ELi19ELi19ELi %6 = call spir_func i50 @_Z30__spirv_ArbitraryFloatLogINTELILi50ELi50EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i50 %5, i32 19, i32 30, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_50]] [[Log_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatLogINTEL [[Ty_50]] [[#]] [[Log_AId]] 19 30 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLogINTEL ; CHECK-LLVM: call i50 @intel_arbitrary_float_log.i50.i50(i50 %[[#]], i32 19, i32 30, i32 0, i32 2, i32 1) store i50 %6, i50* %2, align 8, !tbaa !63 %7 = bitcast i50* %2 to i8* @@ -1087,7 +1065,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_log2ILi17ELi20ELi18EL %6 = call spir_func i38 @_Z31__spirv_ArbitraryFloatLog2INTELILi38ELi38EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i38 %5, i32 20, i32 19, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_38]] [[Log2_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatLog2INTEL [[Ty_38]] [[#]] [[Log2_AId]] 20 19 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLog2INTEL ; CHECK-LLVM: call i38 @intel_arbitrary_float_log2.i38.i38(i38 %[[#]], i32 20, i32 19, i32 0, i32 2, i32 1) store i38 %6, i38* %2, align 8, !tbaa !65 %7 = bitcast i38* %2 to i8* @@ -1108,7 +1085,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_log10ILi4ELi3ELi4ELi5 %5 = call spir_func signext i10 @_Z32__spirv_ArbitraryFloatLog10INTELILi8ELi10EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i8 signext %4, i32 3, i32 5, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_8]] [[Log10_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatLog10INTEL [[Ty_10]] [[#]] [[Log10_AId]] 3 5 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLog10INTEL ; CHECK-LLVM: call i10 @intel_arbitrary_float_log10.i10.i8(i8 %[[#]], i32 3, i32 5, i32 0, i32 2, i32 1) store i10 %5, i10* %2, align 2, !tbaa !69 %6 = bitcast i10* %2 to i8* @@ -1129,7 +1105,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_log1pILi17ELi30ELi18E %6 = call spir_func i49 @_Z32__spirv_ArbitraryFloatLog1pINTELILi48ELi49EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i48 %5, i32 30, i32 30, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_48]] [[Log1p_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatLog1pINTEL [[Ty_49]] [[#]] [[Log1p_AId]] 30 30 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatLog1pINTEL ; CHECK-LLVM: call i49 @intel_arbitrary_float_log1p.i49.i48(i48 %[[#]], i32 30, i32 30, i32 0, i32 2, i32 1) store i49 %6, i49* %2, align 8, !tbaa !73 %7 = bitcast i49* %2 to i8* @@ -1151,7 +1126,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_expILi16ELi25ELi16ELi %6 = call spir_func i42 @_Z30__spirv_ArbitraryFloatExpINTELILi42ELi42EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i42 %5, i32 25, i32 25, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_42]] [[Exp_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatExpINTEL [[Ty_42]] [[#]] [[Exp_AId]] 25 25 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatExpINTEL ; CHECK-LLVM: call i42 @intel_arbitrary_float_exp.i42.i42(i42 %[[#]], i32 25, i32 25, i32 0, i32 2, i32 1) store i42 %6, i42* %2, align 8, !tbaa !59 %7 = bitcast i42* %2 to i8* @@ -1173,7 +1147,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_exp2ILi1ELi1ELi2ELi2E %6 = call spir_func signext i5 @_Z31__spirv_ArbitraryFloatExp2INTELILi3ELi5EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i3 signext %5, i32 1, i32 2, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_3]] [[Exp2_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatExp2INTEL [[Ty_5]] [[#]] [[Exp2_AId]] 1 2 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatExp2INTEL ; CHECK-LLVM: call i5 @intel_arbitrary_float_exp2.i5.i3(i3 %[[#]], i32 1, i32 2, i32 0, i32 2, i32 1) store i5 %6, i5* %2, align 1, !tbaa !41 %7 = bitcast i5* %2 to i8* @@ -1195,7 +1168,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_exp10ILi8ELi16ELi8ELi %6 = call spir_func signext i25 @_Z32__spirv_ArbitraryFloatExp10INTELILi25ELi25EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i25 signext %5, i32 16, i32 16, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_25]] [[Exp10_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatExp10INTEL [[Ty_25]] [[#]] [[Exp10_AId]] 16 16 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatExp10INTEL ; CHECK-LLVM: call i25 @intel_arbitrary_float_exp10.i25.i25(i25 %[[#]], i32 16, i32 16, i32 0, i32 2, i32 1) store i25 %6, i25* %2, align 4, !tbaa !13 %7 = bitcast i25* %2 to i8* @@ -1217,7 +1189,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_expm1ILi21ELi42ELi20E %6 = call spir_func i62 @_Z32__spirv_ArbitraryFloatExpm1INTELILi64ELi62EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i64 %5, i32 42, i32 41, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_64]] [[Expm1_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatExpm1INTEL [[Ty_62]] [[#]] [[Expm1_AId]] 42 41 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatExpm1INTEL ; CHECK-LLVM: call i62 @intel_arbitrary_float_expm1.i62.i64(i64 %[[#]], i32 42, i32 41, i32 0, i32 2, i32 1) store i62 %6, i62* %2, align 8, !tbaa !79 %7 = bitcast i62* %2 to i8* @@ -1239,7 +1210,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_sinILi14ELi15ELi16ELi %6 = call spir_func i34 @_Z30__spirv_ArbitraryFloatSinINTELILi30ELi34EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i30 signext %5, i32 15, i32 17, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_30]] [[Sin_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatSinINTEL [[Ty_34]] [[#]] [[Sin_AId]] 15 17 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSinINTEL ; CHECK-LLVM: call i34 @intel_arbitrary_float_sin.i34.i30(i30 %[[#]], i32 15, i32 17, i32 0, i32 2, i32 1) store i34 %6, i34* %2, align 8, !tbaa !53 %7 = bitcast i34* %2 to i8* @@ -1261,7 +1231,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_cosILi1ELi2ELi2ELi1EE %6 = call spir_func signext i4 @_Z30__spirv_ArbitraryFloatCosINTELILi4ELi4EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i4 signext %5, i32 2, i32 1, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_4]] [[Cos_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatCosINTEL [[Ty_4]] [[#]] [[Cos_AId]] 2 1 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCosINTEL ; CHECK-LLVM: call i4 @intel_arbitrary_float_cos.i4.i4(i4 %[[#]], i32 2, i32 1, i32 0, i32 2, i32 1) store i4 %6, i4* %2, align 1, !tbaa !81 %7 = bitcast i4* %2 to i8* @@ -1283,7 +1252,6 @@ define linkonce_odr dso_local spir_func void @_Z15ap_float_sincosILi8ELi18ELi10E %6 = call spir_func i62 @_Z33__spirv_ArbitraryFloatSinCosINTELILi27ELi31EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEiiiiii(i27 signext %5, i32 18, i32 20, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_27]] [[SinCos_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatSinCosINTEL [[Ty_62]] [[#]] [[SinCos_AId]] 18 20 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSinCosINTEL ; CHECK-LLVM: call i62 @intel_arbitrary_float_sincos.i62.i27(i27 %[[#]], i32 18, i32 20, i32 0, i32 2, i32 1) store i62 %6, i62* %2, align 8, !tbaa !79 %7 = bitcast i62* %2 to i8* @@ -1305,7 +1273,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_sinpiILi3ELi6ELi6ELi6 %6 = call spir_func signext i13 @_Z32__spirv_ArbitraryFloatSinPiINTELILi10ELi13EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i10 signext %5, i32 6, i32 6, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_10]] [[SinPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatSinPiINTEL [[Ty_13]] [[#]] [[SinPi_AId]] 6 6 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSinPiINTEL ; CHECK-LLVM: call i13 @intel_arbitrary_float_sinpi.i13.i10(i10 %[[#]], i32 6, i32 6, i32 0, i32 2, i32 1) store i13 %6, i13* %2, align 2, !tbaa !19 %7 = bitcast i13* %2 to i8* @@ -1327,7 +1294,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_cospiILi18ELi40ELi18E %6 = call spir_func i59 @_Z32__spirv_ArbitraryFloatCosPiINTELILi59ELi59EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i59 %5, i32 40, i32 40, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_59]] [[CosPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatCosPiINTEL [[Ty_59]] [[#]] [[CosPi_AId]] 40 40 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatCosPiINTEL ; CHECK-LLVM: call i59 @intel_arbitrary_float_cospi.i59.i59(i59 %[[#]], i32 40, i32 40, i32 0, i32 2, i32 1) store i59 %6, i59* %2, align 8, !tbaa !85 %7 = bitcast i59* %2 to i8* @@ -1349,7 +1315,6 @@ define linkonce_odr dso_local spir_func void @_Z17ap_float_sincospiILi9ELi20ELi1 %6 = call spir_func i64 @_Z35__spirv_ArbitraryFloatSinCosPiINTELILi30ELi32EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEiiiiii(i30 signext %5, i32 20, i32 20, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_30]] [[SinCosPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatSinCosPiINTEL [[Ty_64]] [[#]] [[SinCosPi_AId]] 20 20 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatSinCosPiINTEL ; CHECK-LLVM: call i64 @intel_arbitrary_float_sincospi.i64.i30(i30 %[[#]], i32 20, i32 20, i32 0, i32 2, i32 1) store i64 %6, i64* %2, align 8, !tbaa !77 %7 = bitcast i64* %2 to i8* @@ -1371,7 +1336,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_asinILi2ELi4ELi2ELi8E %6 = call spir_func signext i11 @_Z31__spirv_ArbitraryFloatASinINTELILi7ELi11EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i7 signext %5, i32 4, i32 8, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_7]] [[ASin_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatASinINTEL [[Ty_11]] [[#]] [[ASin_AId]] 4 8 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatASinINTEL ; CHECK-LLVM: call i11 @intel_arbitrary_float_asin.i11.i7(i7 %[[#]], i32 4, i32 8, i32 0, i32 2, i32 1) store i11 %6, i11* %2, align 2, !tbaa !27 %7 = bitcast i11* %2 to i8* @@ -1393,7 +1357,6 @@ define linkonce_odr dso_local spir_func void @_Z15ap_float_asinpiILi11ELi23ELi11 %6 = call spir_func i35 @_Z33__spirv_ArbitraryFloatASinPiINTELILi35ELi35EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i35 %5, i32 23, i32 23, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_35]] [[ASinPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatASinPiINTEL [[Ty_35]] [[#]] [[ASinPi_AId]] 23 23 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatASinPiINTEL ; CHECK-LLVM: call i35 @intel_arbitrary_float_asinpi.i35.i35(i35 %[[#]], i32 23, i32 23, i32 0, i32 2, i32 1) store i35 %6, i35* %2, align 8, !tbaa !87 %7 = bitcast i35* %2 to i8* @@ -1415,7 +1378,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_acosILi4ELi9ELi3ELi10 %6 = call spir_func signext i14 @_Z31__spirv_ArbitraryFloatACosINTELILi14ELi14EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i14 signext %5, i32 9, i32 10, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_14]] [[ACos_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatACosINTEL [[Ty_14]] [[#]] [[ACos_AId]] 9 10 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatACosINTEL ; CHECK-LLVM: call i14 @intel_arbitrary_float_acos.i14.i14(i14 %[[#]], i32 9, i32 10, i32 0, i32 2, i32 1) store i14 %6, i14* %2, align 2, !tbaa !23 %7 = bitcast i14* %2 to i8* @@ -1435,7 +1397,6 @@ define linkonce_odr dso_local spir_func void @_Z15ap_float_acospiILi2ELi5ELi3ELi %4 = call spir_func signext i8 @_Z33__spirv_ArbitraryFloatACosPiINTELILi8ELi8EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i8 signext %3, i32 5, i32 4, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_8]] [[ACosPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatACosPiINTEL [[Ty_8]] [[#]] [[ACosPi_AId]] 5 4 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatACosPiINTEL ; CHECK-LLVM: call i8 @intel_arbitrary_float_acospi.i8.i8(i8 %[[#]], i32 5, i32 4, i32 0, i32 2, i32 1) store i8 %4, i8* %2, align 1, !tbaa !67 call void @llvm.lifetime.end.p0i8(i64 1, i8* %2) #5 @@ -1455,7 +1416,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_atanILi12ELi31ELi12EL %6 = call spir_func i44 @_Z31__spirv_ArbitraryFloatATanINTELILi44ELi44EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i44 %5, i32 31, i32 31, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_44]] [[ATan_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatATanINTEL [[Ty_44]] [[#]] [[ATan_AId]] 31 31 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatATanINTEL ; CHECK-LLVM: call i44 @intel_arbitrary_float_atan.i44.i44(i44 %[[#]], i32 31, i32 31, i32 0, i32 2, i32 1) store i44 %6, i44* %2, align 8, !tbaa !89 %7 = bitcast i44* %2 to i8* @@ -1477,7 +1437,6 @@ define linkonce_odr dso_local spir_func void @_Z15ap_float_atanpiILi1ELi38ELi1EL %6 = call spir_func i34 @_Z33__spirv_ArbitraryFloatATanPiINTELILi40ELi34EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEiiiiii(i40 %5, i32 38, i32 32, i32 0, i32 2, i32 1) #5 ; CHECK-SPIRV: 6 Load [[Ty_40]] [[ATanPi_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 9 ArbitraryFloatATanPiINTEL [[Ty_34]] [[#]] [[ATanPi_AId]] 38 32 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatATanPiINTEL ; CHECK-LLVM: call i34 @intel_arbitrary_float_atanpi.i34.i40(i40 %[[#]], i32 38, i32 32, i32 0, i32 2, i32 1) store i34 %6, i34* %2, align 8, !tbaa !53 %7 = bitcast i34* %2 to i8* @@ -1504,7 +1463,6 @@ define linkonce_odr dso_local spir_func void @_Z14ap_float_atan2ILi7ELi16ELi7ELi ; CHECK-SPIRV: 6 Load [[Ty_24]] [[ATan2_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_25]] [[ATan2_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatATan2INTEL [[Ty_27]] [[#]] [[ATan2_AId]] 16 [[ATan2_BId]] 17 18 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatATan2INTEL ; CHECK-LLVM: call i27 @intel_arbitrary_float_atan2.i27.i24.i25(i24 %[[#]], i32 16, i25 %[[#]], i32 17, i32 18, i32 0, i32 2, i32 1) store i27 %9, i27* %3, align 4, !tbaa !83 %10 = bitcast i27* %3 to i8* @@ -1533,7 +1491,6 @@ define linkonce_odr dso_local spir_func void @_Z12ap_float_powILi8ELi8ELi9ELi9EL ; CHECK-SPIRV: 6 Load [[Ty_17]] [[Pow_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_19]] [[Pow_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatPowINTEL [[Ty_21]] [[#]] [[Pow_AId]] 8 [[Pow_BId]] 9 10 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatPowINTEL ; CHECK-LLVM: call i21 @intel_arbitrary_float_pow.i21.i17.i19(i17 %[[#]], i32 8, i19 %[[#]], i32 9, i32 10, i32 0, i32 2, i32 1) store i21 %9, i21* %3, align 4, !tbaa !95 %10 = bitcast i21* %3 to i8* @@ -1562,7 +1519,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_powrILi18ELi35ELi19EL ; CHECK-SPIRV: 6 Load [[Ty_54]] [[PowR_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_55]] [[PowR_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 11 ArbitraryFloatPowRINTEL [[Ty_56]] [[#]] [[PowR_AId]] 35 [[PowR_BId]] 35 35 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatPowRINTEL ; CHECK-LLVM: call i56 @intel_arbitrary_float_powr.i56.i54.i55(i54 %[[#]], i32 35, i55 %[[#]], i32 35, i32 35, i32 0, i32 2, i32 1) store i56 %9, i56* %3, align 8, !tbaa !99 %10 = bitcast i56* %3 to i8* @@ -1591,7 +1547,6 @@ define linkonce_odr dso_local spir_func void @_Z13ap_float_pownILi4ELi7ELi10ELi5 ; CHECK-SPIRV: 6 Load [[Ty_12]] [[PowN_AId:[0-9]+]] ; CHECK-SPIRV-NEXT: 6 Load [[Ty_10]] [[PowN_BId:[0-9]+]] ; CHECK-SPIRV-NEXT: 10 ArbitraryFloatPowNINTEL [[Ty_15]] [[#]] [[PowN_AId]] 7 [[PowN_BId]] 9 0 2 1 -; CHECK-SPIRV-NEGATIVE-NOT: [[#]] ArbitraryFloatPowNINTEL ; CHECK-LLVM: call i15 @intel_arbitrary_float_pown.i15.i12.i10(i12 %[[#]], i32 7, i10 %[[#]], i32 9, i32 0, i32 2, i32 1) store i15 %9, i15* %3, align 2, !tbaa !21 %10 = bitcast i15* %3 to i8* From 3addf30c9448d64bcfeca436b6cbe1f5186edfe2 Mon Sep 17 00:00:00 2001 From: fadeeval <60697485+fadeeval@users.noreply.github.com> Date: Wed, 9 Sep 2020 22:51:35 +0300 Subject: [PATCH 399/465] Fix nullptr dereference (#741) Replaced nullptr with an empty string --- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 152d93158f91d..e8f20cca7a53e 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -1571,8 +1571,7 @@ SPIRVValue *LLVMToSPIRV::transValueWithoutDecoration(Value *V, if (Instruction *Inst = dyn_cast(V)) { BM->getErrorLog().checkError(false, SPIRVEC_InvalidInstruction, - toString(Inst) + "\n", nullptr, __FILE__, - __LINE__); + toString(Inst) + "\n", "", __FILE__, __LINE__); } llvm_unreachable("Not implemented"); From ce75da7e62994c6e75deeebef5d4e38487ce5ace Mon Sep 17 00:00:00 2001 From: Mochalova Anastasiya Date: Thu, 10 Sep 2020 18:44:53 +0300 Subject: [PATCH 400/465] Avoid usage of deprecated "VectorType::getNumElements" (#737) Replaced usages of "VectorType::getNumElements" with "FixedVectorType::getNumElements" Signed-off-by: amochalo --- llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp | 11 ++++---- llvm-spirv/lib/SPIRV/OCLUtil.cpp | 2 +- llvm-spirv/lib/SPIRV/SPIRVReader.cpp | 37 ++++++++++++++------------- llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp | 8 +++--- llvm-spirv/lib/SPIRV/SPIRVUtil.cpp | 10 ++++---- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 4 +-- 6 files changed, 37 insertions(+), 35 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp index 1612babb07e87..e4574bebbcb8c 100644 --- a/llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp +++ b/llvm-spirv/lib/SPIRV/OCL20ToSPIRV.cpp @@ -1335,23 +1335,24 @@ void OCL20ToSPIRV::visitCallRelational(CallInst *CI, StringRef DemangledName) { if (CI->getOperand(0)->getType()->isVectorTy()) Ret = FixedVectorType::get( Type::getInt1Ty(*Ctx), - cast(CI->getOperand(0)->getType())->getNumElements()); + cast(CI->getOperand(0)->getType()) + ->getNumElements()); return SPIRVName; }, [=](CallInst *NewCI) -> Instruction * { Value *False = nullptr, *True = nullptr; if (NewCI->getType()->isVectorTy()) { Type *IntTy = Type::getInt32Ty(*Ctx); - if (cast(NewCI->getOperand(0)->getType()) + if (cast(NewCI->getOperand(0)->getType()) ->getElementType() ->isDoubleTy()) IntTy = Type::getInt64Ty(*Ctx); - if (cast(NewCI->getOperand(0)->getType()) + if (cast(NewCI->getOperand(0)->getType()) ->getElementType() ->isHalfTy()) IntTy = Type::getInt16Ty(*Ctx); Type *VTy = FixedVectorType::get( - IntTy, cast(NewCI->getType())->getNumElements()); + IntTy, cast(NewCI->getType())->getNumElements()); False = Constant::getNullValue(VTy); True = Constant::getAllOnesValue(VTy); } else { @@ -1618,7 +1619,7 @@ static void processSubgroupBlockReadWriteINTEL(CallInst *CI, OCLBuiltinTransInfo &Info, const Type *DataTy, Module *M) { unsigned VectorNumElements = 1; - if (auto *VecTy = dyn_cast(DataTy)) + if (auto *VecTy = dyn_cast(DataTy)) VectorNumElements = VecTy->getNumElements(); unsigned ElementBitSize = DataTy->getScalarSizeInBits(); Info.Postfix = "_"; diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.cpp b/llvm-spirv/lib/SPIRV/OCLUtil.cpp index cb230ce01f528..749bf53abe835 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.cpp +++ b/llvm-spirv/lib/SPIRV/OCLUtil.cpp @@ -805,7 +805,7 @@ unsigned encodeVecTypeHint(Type *Ty) { llvm_unreachable("invalid integer type"); } } - if (VectorType *VecTy = dyn_cast(Ty)) { + if (FixedVectorType *VecTy = dyn_cast(Ty)) { Type *EleTy = VecTy->getElementType(); unsigned Size = VecTy->getNumElements(); return Size << 16 | encodeVecTypeHint(EleTy); diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index c0eab091f3bed..9781d595eadef 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -336,7 +336,7 @@ bool SPIRVToLLVM::transOCLBuiltinFromVariable(GlobalVariable *GV, std::vector Vectors; Loads.push_back(LD); if (HasIndexArg) { - auto *VecTy = cast( + auto *VecTy = cast( LD->getPointerOperandType()->getPointerElementType()); Value *EmptyVec = UndefValue::get(VecTy); Vectors.push_back(EmptyVec); @@ -1937,7 +1937,7 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, IRBuilder<> Builder(BB); auto Scalar = transValue(VTS->getScalar(), F, BB); auto Vector = transValue(VTS->getVector(), F, BB); - auto *VecTy = cast(Vector->getType()); + auto *VecTy = cast(Vector->getType()); unsigned VecSize = VecTy->getNumElements(); auto NewVec = Builder.CreateVectorSplat(VecSize, Scalar, Scalar->getName()); NewVec->takeName(Scalar); @@ -1965,8 +1965,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, unsigned M = Mat->getType()->getArrayNumElements(); - auto *VecTy = cast(Vec->getType()); - VectorType *VTy = FixedVectorType::get(VecTy->getElementType(), M); + auto *VecTy = cast(Vec->getType()); + FixedVectorType *VTy = FixedVectorType::get(VecTy->getElementType(), M); auto ETy = VTy->getElementType(); unsigned N = VecTy->getNumElements(); Value *V = Builder.CreateVectorSplat(M, ConstantFP::get(ETy, 0.0)); @@ -1994,7 +1994,7 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, auto Matrix = transValue(MTS->getMatrix(), F, BB); uint64_t ColNum = Matrix->getType()->getArrayNumElements(); auto ColType = cast(Matrix->getType())->getElementType(); - auto VecSize = cast(ColType)->getNumElements(); + auto VecSize = cast(ColType)->getNumElements(); auto NewVec = Builder.CreateVectorSplat(VecSize, Scalar, Scalar->getName()); NewVec->takeName(Scalar); @@ -2031,8 +2031,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, // where sum is defined as vector sum. unsigned M = Mat->getType()->getArrayNumElements(); - VectorType *VTy = - cast(cast(Mat->getType())->getElementType()); + FixedVectorType *VTy = cast( + cast(Mat->getType())->getElementType()); unsigned N = VTy->getNumElements(); auto ETy = VTy->getElementType(); Value *V = Builder.CreateVectorSplat(N, ConstantFP::get(ETy, 0.0)); @@ -2086,10 +2086,10 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, unsigned C1 = M1->getType()->getArrayNumElements(); unsigned C2 = M2->getType()->getArrayNumElements(); - VectorType *V1Ty = - cast(cast(M1->getType())->getElementType()); - VectorType *V2Ty = - cast(cast(M2->getType())->getElementType()); + FixedVectorType *V1Ty = + cast(cast(M1->getType())->getElementType()); + FixedVectorType *V2Ty = + cast(cast(M2->getType())->getElementType()); unsigned R1 = V1Ty->getNumElements(); unsigned R2 = V2Ty->getNumElements(); auto ETy = V1Ty->getElementType(); @@ -2127,8 +2127,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, IRBuilder<> Builder(BB); auto Matrix = transValue(TR->getMatrix(), F, BB); unsigned ColNum = Matrix->getType()->getArrayNumElements(); - VectorType *ColTy = - cast(cast(Matrix->getType())->getElementType()); + FixedVectorType *ColTy = cast( + cast(Matrix->getType())->getElementType()); unsigned RowNum = ColTy->getNumElements(); auto VTy = FixedVectorType::get(ColTy->getElementType(), ColNum); @@ -4268,7 +4268,7 @@ Instruction *SPIRVToLLVM::transOCLAllAny(SPIRVInstruction *I, BasicBlock *BB) { auto OldArg = CI->getOperand(0); auto NewArgTy = FixedVectorType::get( Int32Ty, - cast(OldArg->getType())->getNumElements()); + cast(OldArg->getType())->getNumElements()); auto NewArg = CastInst::CreateSExtOrBitCast(OldArg, NewArgTy, "", CI); Args[0] = NewArg; @@ -4294,16 +4294,17 @@ Instruction *SPIRVToLLVM::transOCLRelational(SPIRVInstruction *I, Type *IntTy = Type::getInt32Ty(*Context); RetTy = IntTy; if (CI->getType()->isVectorTy()) { - if (cast(CI->getOperand(0)->getType()) + if (cast(CI->getOperand(0)->getType()) ->getElementType() ->isDoubleTy()) IntTy = Type::getInt64Ty(*Context); - if (cast(CI->getOperand(0)->getType()) + if (cast(CI->getOperand(0)->getType()) ->getElementType() ->isHalfTy()) IntTy = Type::getInt16Ty(*Context); RetTy = FixedVectorType::get( - IntTy, cast(CI->getType())->getNumElements()); + IntTy, + cast(CI->getType())->getNumElements()); } return CI->getCalledFunction()->getName().str(); }, @@ -4312,7 +4313,7 @@ Instruction *SPIRVToLLVM::transOCLRelational(SPIRVInstruction *I, if (NewCI->getType()->isVectorTy()) RetTy = FixedVectorType::get( Type::getInt1Ty(*Context), - cast(NewCI->getType())->getNumElements()); + cast(NewCI->getType())->getNumElements()); return CastInst::CreateTruncOrBitCast(NewCI, RetTy, "", NewCI->getNextNode()); }, diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp index 693faca282be3..7fd560e0dfb59 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp @@ -190,13 +190,13 @@ void SPIRVToOCL::visitCallSPRIVImageQuerySize(CallInst *CI) { GetImageSize, FixedVectorType::get( CI->getType()->getScalarType(), - cast(GetImageSize->getType())->getNumElements()), + cast(GetImageSize->getType())->getNumElements()), false, CI->getName(), CI); } } if (ImgArray || ImgDim == 3) { - auto *VecTy = cast(CI->getType()); + auto *VecTy = cast(CI->getType()); const unsigned ImgQuerySizeRetEls = VecTy->getNumElements(); if (ImgDim == 1) { @@ -224,7 +224,7 @@ void SPIRVToOCL::visitCallSPRIVImageQuerySize(CallInst *CI) { if (ImgArray) { assert((ImgDim == 1 || ImgDim == 2) && "invalid image array type"); // Insert get_image_array_size to the last position of the resulting vector. - auto *VecTy = cast(CI->getType()); + auto *VecTy = cast(CI->getType()); Type *SizeTy = Type::getIntNTy(*Ctx, M->getDataLayout().getPointerSizeInBits(0)); Instruction *GetImageArraySize = addCallInst( @@ -482,7 +482,7 @@ void SPIRVToOCL::visitCallSPIRVImageMediaBlockBuiltin(CallInst *CI, Op OC) { else assert(0 && "Unsupported texel type!"); - if (auto *VecTy = dyn_cast(RetType)) { + if (auto *VecTy = dyn_cast(RetType)) { unsigned int NumEl = VecTy->getNumElements(); assert((NumEl == 2 || NumEl == 4 || NumEl == 8 || NumEl == 16) && "Wrong function type!"); diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index 4c3c760f2484b..77f0f7da8b71a 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -141,7 +141,7 @@ std::string mapLLVMTypeToOCLType(const Type *Ty, bool Signed) { } return SignPrefix + Stem; } - if (auto VecTy = dyn_cast(Ty)) { + if (auto VecTy = dyn_cast(Ty)) { Type *EleTy = VecTy->getElementType(); unsigned Size = VecTy->getNumElements(); std::stringstream Ss; @@ -740,7 +740,7 @@ void makeVector(Instruction *InsPos, std::vector &Ops, void expandVector(Instruction *InsPos, std::vector &Ops, size_t VecPos) { auto Vec = Ops[VecPos]; - auto *VT = dyn_cast(Vec->getType()); + auto *VT = dyn_cast(Vec->getType()); if (!VT) return; size_t N = VT->getNumElements(); @@ -1047,7 +1047,7 @@ static SPIR::RefParamType transTypeDesc(Type *Ty, return SPIR::RefParamType(new SPIR::PrimitiveType(SPIR::PRIMITIVE_FLOAT)); if (Ty->isDoubleTy()) return SPIR::RefParamType(new SPIR::PrimitiveType(SPIR::PRIMITIVE_DOUBLE)); - if (auto *VecTy = dyn_cast(Ty)) { + if (auto *VecTy = dyn_cast(Ty)) { return SPIR::RefParamType(new SPIR::VectorType( transTypeDesc(VecTy->getElementType(), Info), VecTy->getNumElements())); } @@ -1161,7 +1161,7 @@ Value *getScalarOrArray(Value *V, unsigned Size, Instruction *Pos) { Constant *getScalarOrVectorConstantInt(Type *T, uint64_t V, bool IsSigned) { if (auto IT = dyn_cast(T)) return ConstantInt::get(IT, V); - if (auto VT = dyn_cast(T)) { + if (auto VT = dyn_cast(T)) { std::vector EV( VT->getNumElements(), getScalarOrVectorConstantInt(VT->getElementType(), V, IsSigned)); @@ -1538,7 +1538,7 @@ bool checkTypeForSPIRVExtendedInstLowering(IntrinsicInst *II, SPIRVModule *BM) { if (II->getArgOperand(0)->getType() != Ty) return false; int NumElems = 1; - if (auto *VecTy = dyn_cast(Ty)) { + if (auto *VecTy = dyn_cast(Ty)) { NumElems = VecTy->getNumElements(); Ty = VecTy->getElementType(); } diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index e8f20cca7a53e..0b38cf8164e69 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -376,7 +376,7 @@ SPIRVType *LLVMToSPIRV::transType(Type *T) { } } - if (auto *VecTy = dyn_cast(T)) + if (auto *VecTy = dyn_cast(T)) return mapType(T, BM->addVectorType(transType(VecTy->getElementType()), VecTy->getNumElements())); @@ -3463,7 +3463,7 @@ LLVMToSPIRV::transBuiltinToInstWithoutDecoration(Op OC, CallInst *CI, auto IsVector = ResultTy->isVectorTy(); if (IsVector) BoolTy = FixedVectorType::get( - BoolTy, cast(ResultTy)->getNumElements()); + BoolTy, cast(ResultTy)->getNumElements()); auto BBT = transType(BoolTy); SPIRVInstruction *Res; if (isCmpOpCode(OC)) { From 4f766392e7cc9d56dc1047bca7427cd4a25572d1 Mon Sep 17 00:00:00 2001 From: Vladimir Lazarev Date: Sun, 13 Sep 2020 21:57:20 +0300 Subject: [PATCH 401/465] Revert "[SYCL] XFAIL test blcoking pulldown" This reverts commit 073088218b4aa2c45cd7b4430d97c21b18424de6. --- llvm-spirv/test/negative/unsup_invoke_instr.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm-spirv/test/negative/unsup_invoke_instr.ll b/llvm-spirv/test/negative/unsup_invoke_instr.ll index f5d2632591277..1bc31610ccec2 100644 --- a/llvm-spirv/test/negative/unsup_invoke_instr.ll +++ b/llvm-spirv/test/negative/unsup_invoke_instr.ll @@ -1,4 +1,3 @@ -; XFAIL: windows ;Translator does not parse some llvm instructions ;and emit errror message in that case. ; RUN: llvm-as %s -o %t.bc From dd7fec83302b9f25226709614518f25c649cd0e6 Mon Sep 17 00:00:00 2001 From: jtmott-intel <59898758+jtmott-intel@users.noreply.github.com> Date: Sun, 13 Sep 2020 22:41:29 -0700 Subject: [PATCH 402/465] [SYCL] Support or diagnose use of namespace std types as kernel type names (#1963) When std::nullptr_t is used as a kernel type, the generated integration header uses 'nullptr_t'. This causes lookup errors. Use 'std::nullptr_t' instead. std::max_align_t is defined (in one implementation) as a typedef of an anonymous struct. This causes errors when attempting to forward declare the type in the integration header. Diagnose such cases earlier. Signed-off-by: Premanand M Rao --- .../clang/Basic/DiagnosticSemaKinds.td | 1 + clang/lib/Sema/SemaSYCL.cpp | 14 ++- .../test/CodeGenSYCL/stdtypes_kernel_type.cpp | 63 +++++++++++++ clang/test/SemaSYCL/unnamed-kernel.cpp | 20 ++++- sycl/test/group-algorithm/broadcast.cpp | 15 ++-- sycl/test/group-algorithm/exclusive_scan.cpp | 39 ++++---- sycl/test/group-algorithm/inclusive_scan.cpp | 40 +++++---- sycl/test/group-algorithm/reduce.cpp | 53 +++++------ sycl/test/reduction/reduction_ctor.cpp | 89 ++++++++++--------- .../reduction/reduction_nd_conditional.cpp | 38 +++----- sycl/test/reduction/reduction_nd_ext_type.hpp | 46 +++++----- sycl/test/reduction/reduction_nd_s0_dw.cpp | 60 +++++++------ sycl/test/reduction/reduction_nd_s0_rw.cpp | 63 +++++++------ sycl/test/reduction/reduction_nd_s1_dw.cpp | 64 +++++++------ sycl/test/reduction/reduction_nd_s1_rw.cpp | 61 +++++++------ sycl/test/reduction/reduction_placeholder.cpp | 37 ++++---- sycl/test/reduction/reduction_transparent.cpp | 37 ++++---- sycl/test/reduction/reduction_usm.cpp | 56 +++++++----- sycl/test/sub_group/generic-shuffle.cpp | 86 +++++++++--------- sycl/test/sub_group/reduce.cpp | 10 +-- sycl/test/sub_group/reduce.hpp | 50 +++++++---- sycl/test/sub_group/reduce_fp16.cpp | 2 +- sycl/test/sub_group/reduce_fp64.cpp | 2 +- sycl/test/sub_group/scan.cpp | 10 +-- sycl/test/sub_group/scan.hpp | 77 ++++++++++------ sycl/test/sub_group/scan_fp16.cpp | 2 +- sycl/test/sub_group/scan_fp64.cpp | 2 +- 27 files changed, 616 insertions(+), 421 deletions(-) create mode 100644 clang/test/CodeGenSYCL/stdtypes_kernel_type.cpp diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 87c6147b3f0ac..82654f9e19ef4 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10982,6 +10982,7 @@ def err_sycl_kernel_incorrectly_named : Error< "kernel %select{name is missing" "|needs to have a globally-visible name" "|name is invalid. Unscoped enum requires fixed underlying type" + "|name cannot be a type in the \"std\" namespace" "}0">; def err_sycl_kernel_not_function_object : Error<"kernel parameter must be a lambda or function object">; diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 211389e24f73b..fb54d682a1085 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2908,6 +2908,13 @@ void SYCLIntegrationHeader::emitFwdDecl(raw_ostream &O, const Decl *D, } break; } + + if (NS->isStdNamespace()) { + Diag.Report(KernelLocation, diag::err_sycl_kernel_incorrectly_named) + << /* name cannot be a type in the std namespace */ 3; + return; + } + ++NamespaceCnt; const StringRef NSInlinePrefix = NS->isInline() ? "inline " : ""; NSStr.insert( @@ -2990,8 +2997,13 @@ void SYCLIntegrationHeader::emitForwardClassDecls( ; const CXXRecordDecl *RD = T->getAsCXXRecordDecl(); - if (!RD) + if (!RD) { + if (T->isNullPtrType()) + Diag.Report(KernelLocation, diag::err_sycl_kernel_incorrectly_named) + << /* name cannot be a type in the std namespace */ 3; + return; + } // see if this is a template specialization ... if (const auto *TSD = dyn_cast(RD)) { diff --git a/clang/test/CodeGenSYCL/stdtypes_kernel_type.cpp b/clang/test/CodeGenSYCL/stdtypes_kernel_type.cpp new file mode 100644 index 0000000000000..1509a78741292 --- /dev/null +++ b/clang/test/CodeGenSYCL/stdtypes_kernel_type.cpp @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsycl-int-header=%t.h -DCHECK_ERROR -verify %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -fsycl-int-header=%t.h %s +// RUN: FileCheck -input-file=%t.h %s +// +// CHECK: #include +// CHECK-NEXT: #include +// +// CHECK: static constexpr +// CHECK-NEXT: const char* const kernel_names[] = { +// CHECK-NEXT: "_ZTSm", +// CHECK-NEXT: "_ZTSl" +// CHECK-NEXT: }; +// +// CHECK: static constexpr +// CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { +// CHECK-NEXT: //--- _ZTSm +// CHECK-EMPTY: +// CHECK-NEXT: //--- _ZTSl +// CHECK-EMPTY: +// CHECK-NEXT: }; +// +// CHECK: static constexpr +// CHECK-NEXT: const unsigned kernel_signature_start[] = { +// CHECK-NEXT: 0, // _ZTSm +// CHECK-NEXT: 1 // _ZTSl +// CHECK-NEXT: }; + +// CHECK: template <> struct KernelInfo { +// CHECK: template <> struct KernelInfo { + +void usage() { +} + +namespace std { +typedef long unsigned int size_t; +typedef long int ptrdiff_t; +typedef decltype(nullptr) nullptr_t; +class T; +class U; +} // namespace std + +template +struct Templated_kernel_name; + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + +int main() { +#ifdef CHECK_ERROR + kernel_single_task([=]() {}); // expected-error {{kernel name cannot be a type in the "std" namespace}} + kernel_single_task([=]() {}); // expected-error {{kernel name cannot be a type in the "std" namespace}} + kernel_single_task>([=]() {}); // expected-error {{kernel name cannot be a type in the "std" namespace}} + kernel_single_task>([=]() {}); // expected-error {{kernel name cannot be a type in the "std" namespace}} +#endif + + // Although in the std namespace, these resolve to builtins such as `int` that are allowed in kernel names + kernel_single_task([=]() {}); + kernel_single_task([=]() {}); + + return 0; +} diff --git a/clang/test/SemaSYCL/unnamed-kernel.cpp b/clang/test/SemaSYCL/unnamed-kernel.cpp index 540aea29624f5..dc2ef3a94385f 100644 --- a/clang/test/SemaSYCL/unnamed-kernel.cpp +++ b/clang/test/SemaSYCL/unnamed-kernel.cpp @@ -11,6 +11,11 @@ template class KernelName; } +namespace std { +typedef struct { +} max_align_t; +} // namespace std + struct MyWrapper { private: class InvalidKernelName0 {}; @@ -41,7 +46,7 @@ struct MyWrapper { #ifndef __SYCL_UNNAMED_LAMBDA__ // expected-error@+4 {{kernel needs to have a globally-visible name}} - // expected-note@16 {{InvalidKernelName0 declared here}} + // expected-note@21 {{InvalidKernelName0 declared here}} #endif q.submit([&](cl::sycl::handler &h) { h.single_task([] {}); @@ -49,7 +54,7 @@ struct MyWrapper { #ifndef __SYCL_UNNAMED_LAMBDA__ // expected-error@+4 {{kernel needs to have a globally-visible name}} - // expected-note@17 {{InvalidKernelName3 declared here}} + // expected-note@22 {{InvalidKernelName3 declared here}} #endif q.submit([&](cl::sycl::handler &h) { h.single_task>([] {}); @@ -60,10 +65,17 @@ struct MyWrapper { h.single_task([] {}); }); +#ifndef __SYCL_UNNAMED_LAMBDA__ + // expected-error@+3 {{kernel name cannot be a type in the "std" namespace}} +#endif + q.submit([&](cl::sycl::handler &h) { + h.single_task([] {}); + }); + using InvalidAlias = InvalidKernelName4; #ifndef __SYCL_UNNAMED_LAMBDA__ // expected-error@+4 {{kernel needs to have a globally-visible name}} - // expected-note@18 {{InvalidKernelName4 declared here}} + // expected-note@23 {{InvalidKernelName4 declared here}} #endif q.submit([&](cl::sycl::handler &h) { h.single_task([] {}); @@ -72,7 +84,7 @@ struct MyWrapper { using InvalidAlias1 = InvalidKernelName5; #ifndef __SYCL_UNNAMED_LAMBDA__ // expected-error@+4 {{kernel needs to have a globally-visible name}} - // expected-note@19 {{InvalidKernelName5 declared here}} + // expected-note@24 {{InvalidKernelName5 declared here}} #endif q.submit([&](cl::sycl::handler &h) { h.single_task>([] {}); diff --git a/sycl/test/group-algorithm/broadcast.cpp b/sycl/test/group-algorithm/broadcast.cpp index d0320701010f1..b9d05701113d4 100644 --- a/sycl/test/group-algorithm/broadcast.cpp +++ b/sycl/test/group-algorithm/broadcast.cpp @@ -15,14 +15,11 @@ using namespace sycl; using namespace sycl::ONEAPI; -template -class broadcast_kernel; - -template +template void test(queue q, InputContainer input, OutputContainer output) { typedef typename InputContainer::value_type InputT; typedef typename OutputContainer::value_type OutputT; - typedef class broadcast_kernel kernel_name; size_t N = input.size(); size_t G = 4; range<2> R(G, G); @@ -63,7 +60,7 @@ int main() { std::array output; std::iota(input.begin(), input.end(), 1); std::fill(output.begin(), output.end(), false); - test(q, input, output); + test(q, input, output); } // Test pointer type @@ -74,7 +71,7 @@ int main() { input[i] = static_cast(0x0) + i; } std::fill(output.begin(), output.end(), static_cast(0x0)); - test(q, input, output); + test(q, input, output); } // Test user-defined type @@ -88,7 +85,7 @@ int main() { std::complex(0, 1) + (float)i * std::complex(2, 2); } std::fill(output.begin(), output.end(), std::complex(0, 0)); - test(q, input, output); + test(q, input, output); } { std::array, N> input; @@ -98,7 +95,7 @@ int main() { std::complex(0, 1) + (double)i * std::complex(2, 2); } std::fill(output.begin(), output.end(), std::complex(0, 0)); - test(q, input, output); + test(q, input, output); } std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp index 1b831a18c0073..b18a16af1b816 100644 --- a/sycl/test/group-algorithm/exclusive_scan.cpp +++ b/sycl/test/group-algorithm/exclusive_scan.cpp @@ -24,7 +24,7 @@ using namespace sycl; using namespace sycl::ONEAPI; -template +template class exclusive_scan_kernel; // std::exclusive_scan isn't implemented yet, so use serial implementation @@ -44,17 +44,17 @@ OutputIterator exclusive_scan(InputIterator first, InputIterator last, } } // namespace emu -template +template void test(queue q, InputContainer input, OutputContainer output, BinaryOperation binary_op, typename OutputContainer::value_type identity) { typedef typename InputContainer::value_type InputT; typedef typename OutputContainer::value_type OutputT; - typedef class exclusive_scan_kernel kernel_name0; - typedef class exclusive_scan_kernel kernel_name1; - typedef class exclusive_scan_kernel kernel_name2; - typedef class exclusive_scan_kernel kernel_name3; + typedef class exclusive_scan_kernel kernel_name0; + typedef class exclusive_scan_kernel kernel_name1; + typedef class exclusive_scan_kernel kernel_name2; + typedef class exclusive_scan_kernel kernel_name3; OutputT init = 42; size_t N = input.size(); size_t G = 16; @@ -159,19 +159,24 @@ int main() { std::iota(input.begin(), input.end(), 0); std::fill(output.begin(), output.end(), 0); - test(q, input, output, plus<>(), 0); - test(q, input, output, minimum<>(), std::numeric_limits::max()); - test(q, input, output, maximum<>(), std::numeric_limits::lowest()); + test(q, input, output, plus<>(), 0); + test(q, input, output, minimum<>(), + std::numeric_limits::max()); + test(q, input, output, maximum<>(), + std::numeric_limits::lowest()); - test(q, input, output, plus(), 0); - test(q, input, output, minimum(), std::numeric_limits::max()); - test(q, input, output, maximum(), std::numeric_limits::lowest()); + test(q, input, output, plus(), 0); + test(q, input, output, minimum(), + std::numeric_limits::max()); + test(q, input, output, maximum(), + std::numeric_limits::lowest()); #ifdef SPIRV_1_3 - test(q, input, output, multiplies(), 1); - test(q, input, output, bit_or(), 0); - test(q, input, output, bit_xor(), 0); - test(q, input, output, bit_and(), ~0); + test(q, input, output, multiplies(), + 1); + test(q, input, output, bit_or(), 0); + test(q, input, output, bit_xor(), 0); + test(q, input, output, bit_and(), ~0); #endif // SPIRV_1_3 std::cout << "Test passed." << std::endl; diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp index e6fddd1c2d4aa..ea398bf2d0273 100644 --- a/sycl/test/group-algorithm/inclusive_scan.cpp +++ b/sycl/test/group-algorithm/inclusive_scan.cpp @@ -24,7 +24,7 @@ using namespace sycl; using namespace sycl::ONEAPI; -template +template class inclusive_scan_kernel; // std::inclusive_scan isn't implemented yet, so use serial implementation @@ -44,17 +44,17 @@ OutputIterator inclusive_scan(InputIterator first, InputIterator last, } } // namespace emu -template +template void test(queue q, InputContainer input, OutputContainer output, BinaryOperation binary_op, typename OutputContainer::value_type identity) { typedef typename InputContainer::value_type InputT; typedef typename OutputContainer::value_type OutputT; - typedef class inclusive_scan_kernel kernel_name0; - typedef class inclusive_scan_kernel kernel_name1; - typedef class inclusive_scan_kernel kernel_name2; - typedef class inclusive_scan_kernel kernel_name3; + typedef class inclusive_scan_kernel kernel_name0; + typedef class inclusive_scan_kernel kernel_name1; + typedef class inclusive_scan_kernel kernel_name2; + typedef class inclusive_scan_kernel kernel_name3; OutputT init = 42; size_t N = input.size(); size_t G = 16; @@ -159,19 +159,25 @@ int main() { std::iota(input.begin(), input.end(), 0); std::fill(output.begin(), output.end(), 0); - test(q, input, output, plus<>(), 0); - test(q, input, output, minimum<>(), std::numeric_limits::max()); - test(q, input, output, maximum<>(), std::numeric_limits::lowest()); + test(q, input, output, plus<>(), 0); + test(q, input, output, minimum<>(), + std::numeric_limits::max()); + test(q, input, output, maximum<>(), + std::numeric_limits::lowest()); - test(q, input, output, plus(), 0); - test(q, input, output, minimum(), std::numeric_limits::max()); - test(q, input, output, maximum(), std::numeric_limits::lowest()); + test(q, input, output, plus(), 0); + test(q, input, output, minimum(), + std::numeric_limits::max()); + test(q, input, output, maximum(), + std::numeric_limits::lowest()); #ifdef SPIRV_1_3 - test(q, input, output, multiplies(), 1); - test(q, input, output, bit_or(), 0); - test(q, input, output, bit_xor(), 0); - test(q, input, output, bit_and(), ~0); + test(q, input, output, + multiplies(), 1); + test(q, input, output, bit_or(), 0); + test(q, input, output, bit_xor(), + 0); + test(q, input, output, bit_and(), ~0); #endif // SPIRV_1_3 std::cout << "Test passed." << std::endl; diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp index 010a0ce75b2b4..708de23653d2d 100644 --- a/sycl/test/group-algorithm/reduce.cpp +++ b/sycl/test/group-algorithm/reduce.cpp @@ -23,17 +23,13 @@ using namespace sycl; using namespace sycl::ONEAPI; -template -class reduce_kernel; - -template +template void test(queue q, InputContainer input, OutputContainer output, BinaryOperation binary_op, typename OutputContainer::value_type identity) { typedef typename InputContainer::value_type InputT; typedef typename OutputContainer::value_type OutputT; - typedef class reduce_kernel kernel_name; OutputT init = 42; size_t N = input.size(); size_t G = 16; @@ -44,15 +40,17 @@ void test(queue q, InputContainer input, OutputContainer output, q.submit([&](handler &cgh) { auto in = in_buf.template get_access(cgh); auto out = out_buf.template get_access(cgh); - cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { - group<1> g = it.get_group(); - int lid = it.get_local_id(0); - out[0] = reduce(g, in[lid], binary_op); - out[1] = reduce(g, in[lid], init, binary_op); - out[2] = reduce(g, in.get_pointer(), in.get_pointer() + N, binary_op); - out[3] = - reduce(g, in.get_pointer(), in.get_pointer() + N, init, binary_op); - }); + cgh.parallel_for( + nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[0] = reduce(g, in[lid], binary_op); + out[1] = reduce(g, in[lid], init, binary_op); + out[2] = + reduce(g, in.get_pointer(), in.get_pointer() + N, binary_op); + out[3] = reduce(g, in.get_pointer(), in.get_pointer() + N, init, + binary_op); + }); }); } // std::reduce is not implemented yet, so use std::accumulate instead @@ -97,19 +95,24 @@ int main() { std::iota(input.begin(), input.end(), 0); std::fill(output.begin(), output.end(), 0); - test(q, input, output, plus<>(), 0); - test(q, input, output, minimum<>(), std::numeric_limits::max()); - test(q, input, output, maximum<>(), std::numeric_limits::lowest()); + test(q, input, output, plus<>(), 0); + test(q, input, output, minimum<>(), + std::numeric_limits::max()); + test(q, input, output, maximum<>(), + std::numeric_limits::lowest()); - test(q, input, output, plus(), 0); - test(q, input, output, minimum(), std::numeric_limits::max()); - test(q, input, output, maximum(), std::numeric_limits::lowest()); + test(q, input, output, plus(), 0); + test(q, input, output, minimum(), + std::numeric_limits::max()); + test(q, input, output, maximum(), + std::numeric_limits::lowest()); #ifdef SPIRV_1_3 - test(q, input, output, multiplies(), 1); - test(q, input, output, bit_or(), 0); - test(q, input, output, bit_xor(), 0); - test(q, input, output, bit_and(), ~0); + test(q, input, output, + multiplies(), 1); + test(q, input, output, bit_or(), 0); + test(q, input, output, bit_xor(), 0); + test(q, input, output, bit_and(), ~0); #endif // SPIRV_1_3 std::cout << "Test passed." << std::endl; diff --git a/sycl/test/reduction/reduction_ctor.cpp b/sycl/test/reduction/reduction_ctor.cpp index 4828b1dc94535..c10fe74d7643c 100644 --- a/sycl/test/reduction/reduction_ctor.cpp +++ b/sycl/test/reduction/reduction_ctor.cpp @@ -10,7 +10,6 @@ using namespace cl::sycl; - template void test_reducer(Reduction &Redu, T A, T B) { typename Reduction::reducer_type Reducer; @@ -34,12 +33,10 @@ void test_reducer(Reduction &Redu, T Identity, BinaryOperation BOp, T A, T B) { "Wrong result of binary operation."); } -template -class Known; -template -class Unknown; +template class KernelNameGroup; -template +template void testKnown(T Identity, BinaryOperation BOp, T A, T B) { buffer ReduBuf(1); @@ -50,17 +47,17 @@ void testKnown(T Identity, BinaryOperation BOp, T A, T B) { accessor ReduAcc(ReduBuf, CGH); auto Redu = ONEAPI::reduction(ReduAcc, BOp); - assert(Redu.getIdentity() == Identity && - "Failed getIdentity() check()."); + assert(Redu.getIdentity() == Identity && "Failed getIdentity() check()."); test_reducer(Redu, A, B); test_reducer(Redu, Identity, BOp, A, B); // Command group must have at least one task in it. Use an empty one. - CGH.single_task>([=]() {}); + CGH.single_task([=]() {}); }); } -template +template void testUnknown(T Identity, BinaryOperation BOp, T A, T B) { buffer ReduBuf(1); queue Q; @@ -70,49 +67,61 @@ void testUnknown(T Identity, BinaryOperation BOp, T A, T B) { accessor ReduAcc(ReduBuf, CGH); auto Redu = ONEAPI::reduction(ReduAcc, Identity, BOp); - assert(Redu.getIdentity() == Identity && - "Failed getIdentity() check()."); + assert(Redu.getIdentity() == Identity && "Failed getIdentity() check()."); test_reducer(Redu, Identity, BOp, A, B); // Command group must have at least one task in it. Use an empty one. - CGH.single_task([=]() {}); + CGH.single_task([=]() {}); }); } -template +template void testBoth(T Identity, BinaryOperation BOp, T A, T B) { - testKnown(Identity, BOp, A, B); - testKnown(Identity, BOp, A, B); - testUnknown>(Identity, BOp, A, B); - testUnknown>(Identity, BOp, A, B); + testKnown, + T, 0>(Identity, BOp, A, B); + testKnown< + KernelNameGroup, + T, 1>(Identity, BOp, A, B); + testUnknown< + KernelNameGroup, T, + 0>(Identity, BOp, A, B); + testUnknown, + T, 1>(Identity, BOp, A, B); } int main() { - testBoth(0, ONEAPI::plus(), 1, 7); - testBoth(1, std::multiplies(), 1, 7); - testBoth(0, ONEAPI::bit_or(), 1, 8); - testBoth(0, ONEAPI::bit_xor(), 7, 3); - testBoth(~0, ONEAPI::bit_and(), 7, 3); - testBoth((std::numeric_limits::max)(), ONEAPI::minimum(), 7, - 3); - testBoth((std::numeric_limits::min)(), ONEAPI::maximum(), 7, - 3); - - testBoth(0, ONEAPI::plus(), 1, 7); - testBoth(1, std::multiplies(), 1, 7); - testBoth(getMaximumFPValue(), ONEAPI::minimum(), 7, 3); - testBoth(getMinimumFPValue(), ONEAPI::maximum(), 7, 3); - - testUnknown, 0, - Unknown, 0, CustomVecPlus>>( - CustomVec(0), CustomVecPlus(), CustomVec(1), - CustomVec(7)); - testUnknown, 1, - Unknown, 1, CustomVecPlus>>( + testBoth(0, ONEAPI::plus(), 1, + 7); + testBoth(1, std::multiplies(), 1, 7); + testBoth(0, ONEAPI::bit_or(), + 1, 8); + testBoth(0, ONEAPI::bit_xor(), + 7, 3); + testBoth(~0, ONEAPI::bit_and(), 7, + 3); + testBoth( + (std::numeric_limits::max)(), ONEAPI::minimum(), 7, 3); + testBoth((std::numeric_limits::min)(), + ONEAPI::maximum(), 7, 3); + + testBoth(0, ONEAPI::plus(), 1, + 7); + testBoth( + 1, std::multiplies(), 1, 7); + testBoth( + getMaximumFPValue(), ONEAPI::minimum(), 7, 3); + testBoth(getMinimumFPValue(), + ONEAPI::maximum(), 7, 3); + + testUnknown, 0, + CustomVecPlus>(CustomVec(0), CustomVecPlus(), + CustomVec(1), CustomVec(7)); + testUnknown, 1>( CustomVec(0), CustomVecPlus(), CustomVec(1), CustomVec(7)); - testUnknown( + testUnknown( 0, [](auto a, auto b) { return a | b; }, 1, 8); std::cout << "Test passed\n"; diff --git a/sycl/test/reduction/reduction_nd_conditional.cpp b/sycl/test/reduction/reduction_nd_conditional.cpp index 985f77cbc77f4..9db6fac910809 100644 --- a/sycl/test/reduction/reduction_nd_conditional.cpp +++ b/sycl/test/reduction/reduction_nd_conditional.cpp @@ -35,25 +35,16 @@ void initInputData(buffer &InBuf, T &ExpectedOut, T Identity, } }; -template -class SomeClass; - -template -struct Vec { +template struct Vec { Vec() : X(0), Y(0) {} Vec(T X, T Y) : X(X), Y(Y) {} Vec(T V) : X(V), Y(V) {} - bool operator==(const Vec &P) const { - return P.X == X && P.Y == Y; - } - bool operator!=(const Vec &P) const { - return !(*this == P); - } + bool operator==(const Vec &P) const { return P.X == X && P.Y == Y; } + bool operator!=(const Vec &P) const { return !(*this == P); } T X; T Y; }; -template -bool operator==(const Vec &A, const Vec &B) { +template bool operator==(const Vec &A, const Vec &B) { return A.X == B.X && A.Y == B.Y; } template @@ -61,15 +52,13 @@ std::ostream &operator<<(std::ostream &OS, const Vec &P) { return OS << "(" << P.X << ", " << P.Y << ")"; } -template -struct VecPlus { +template struct VecPlus { using P = Vec; - P operator()(const P &A, const P &B) const { - return P(A.X + B.X, A.Y + B.Y); - } + P operator()(const P &A, const P &B) const { return P(A.X + B.X, A.Y + B.Y); } }; -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -90,7 +79,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { size_t I = NDIt.get_global_linear_id(); if (I < 2) @@ -114,10 +103,11 @@ void test(T Identity, size_t WGSize, size_t NWItems) { } int main() { - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 2, 64); - test>(0, 16, 256); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 2, + 64); + test>(0, 16, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_ext_type.hpp b/sycl/test/reduction/reduction_nd_ext_type.hpp index f81a913837d46..4cb9046092c82 100644 --- a/sycl/test/reduction/reduction_nd_ext_type.hpp +++ b/sycl/test/reduction/reduction_nd_ext_type.hpp @@ -8,10 +8,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -28,14 +26,13 @@ void test(T Identity, size_t WGSize, size_t NWItems) { queue Q; Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); - accessor - Out(OutBuf, CGH); + accessor Out(OutBuf, CGH); auto Redu = ONEAPI::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -44,18 +41,18 @@ void test(T Identity, size_t WGSize, size_t NWItems) { // Check correctness. auto Out = OutBuf.template get_access(); T ComputedOut = *(Out.get_pointer()); - T MaxDiff = 3 * std::numeric_limits::epsilon() * std::fabs(ComputedOut + CorrectOut); + T MaxDiff = 3 * std::numeric_limits::epsilon() * + std::fabs(ComputedOut + CorrectOut); if (std::fabs(static_cast(ComputedOut - CorrectOut)) > MaxDiff) { std::cout << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; std::cout << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut - << ", MaxDiff = " << MaxDiff << "\n"; + << ", Expected value: " << CorrectOut << ", MaxDiff = " << MaxDiff + << "\n"; assert(0 && "Wrong value."); } } -template -int runTests(const string_class &ExtensionName) { +template int runTests(const string_class &ExtensionName) { device D = default_selector().select_device(); if (!D.is_host() && !D.has_extension(ExtensionName)) { std::cout << "Test skipped\n"; @@ -63,20 +60,23 @@ int runTests(const string_class &ExtensionName) { } // Check some less standards WG sizes and corner cases first. - test>(0, 4, 4); - test>(0, 4, 64); + test>(0, 4, 4); + test>(0, 4, 64); - test>( - getMaximumFPValue(), 7, 7); - test>( - getMinimumFPValue(), 7, 7 * 5); + test>(getMaximumFPValue(), 7, 7); + test>(getMinimumFPValue(), 7, 7 * 5); #if __cplusplus >= 201402L - test>(1, 3, 3 * 5); - test>( - getMaximumFPValue(), 3, 3); - test>( - getMinimumFPValue(), 3, 3); + test>(1, 3, 3 * 5); + test>(getMaximumFPValue(), 3, 3); + test>(getMinimumFPValue(), 3, 3); #endif // __cplusplus >= 201402L std::cout << "Test passed\n"; diff --git a/sycl/test/reduction/reduction_nd_s0_dw.cpp b/sycl/test/reduction/reduction_nd_s0_dw.cpp index 8b900a3a1fd9a..0a4a9032a3b30 100644 --- a/sycl/test/reduction/reduction_nd_s0_dw.cpp +++ b/sycl/test/reduction/reduction_nd_s0_dw.cpp @@ -16,10 +16,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -40,7 +38,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -59,36 +57,44 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, 64); + test>(0, 8, + 128); + test>(0, 16, + 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, 256); // Check with various operations. - test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(1, 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>( + (std::numeric_limits::max)(), 8, 256); + test>( + (std::numeric_limits::min)(), 8, 256); // Check with various types. - test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(1, 8, + 256); + test>( + getMaximumFPValue(), 8, 256); + test>( + getMinimumFPValue(), 8, 256); // Check with CUSTOM type. - test, 0, CustomVecPlus>(CustomVec(0), 8, 256); + test, 0, + CustomVecPlus>(CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_s0_rw.cpp b/sycl/test/reduction/reduction_nd_s0_rw.cpp index 029458942390a..d346016ae4bfd 100644 --- a/sycl/test/reduction/reduction_nd_s0_rw.cpp +++ b/sycl/test/reduction/reduction_nd_s0_rw.cpp @@ -16,10 +16,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -42,7 +40,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -61,36 +59,47 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, + 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, + 64); + test>(0, 8, + 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, + 256); // Check with various operations. - test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(1, 8, 256); + test>(0, 8, + 256); + test>(0, 8, + 256); + test>(~0, 8, 256); + test>( + (std::numeric_limits::max)(), 8, 256); + test>( + (std::numeric_limits::min)(), 8, 256); // Check with various types. - test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(1, 8, 256); + test>( + getMaximumFPValue(), 8, 256); + test>( + getMinimumFPValue(), 8, 256); // Check with CUSTOM type. - test, 0, CustomVecPlus>(CustomVec(0), 8, 256); + test, 0, + CustomVecPlus>(CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_s1_dw.cpp b/sycl/test/reduction/reduction_nd_s1_dw.cpp index 7cc975e261dc2..df31961a1c908 100644 --- a/sycl/test/reduction/reduction_nd_s1_dw.cpp +++ b/sycl/test/reduction/reduction_nd_s1_dw.cpp @@ -17,10 +17,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -41,7 +39,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -60,36 +58,48 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, + 2); + test>(0, 7, + 7); + test>(0, 9, + 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, + 64); + test>(0, 4, 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, + 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, + 256); // Check with various operations. - test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(1, 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>( + (std::numeric_limits::max)(), 8, 256); + test>( + (std::numeric_limits::min)(), 8, 256); // Check with various types. - test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(1, 8, + 256); + test>( + getMaximumFPValue(), 8, 256); + test>( + getMinimumFPValue(), 8, 256); // Check with CUSTOM type. - test, 1, CustomVecPlus>(CustomVec(0), 8, 256); + test, 1, + CustomVecPlus>(CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_s1_rw.cpp b/sycl/test/reduction/reduction_nd_s1_rw.cpp index 2c8f6a8343e83..ec7044f1c12f8 100644 --- a/sycl/test/reduction/reduction_nd_s1_rw.cpp +++ b/sycl/test/reduction/reduction_nd_s1_rw.cpp @@ -17,10 +17,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -43,7 +41,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -62,36 +60,45 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, + 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, + 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, + 256); + test>(0, 128, + 256); + test>(0, 256, 256); // Check with various operations. - test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(1, 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>( + (std::numeric_limits::max)(), 8, 256); + test>( + (std::numeric_limits::min)(), 8, 256); // Check with various types. - test>(1, 8, 256); - test>(getMaximumFPValue(), 1, 16); - test>(getMinimumFPValue(), 8, 256); + test>(1, 8, 256); + test>( + getMaximumFPValue(), 1, 16); + test>( + getMinimumFPValue(), 8, 256); // Check with CUSTOM type. - test, 1, CustomVecPlus>(CustomVec(0), 8, 256); + test, 1, + CustomVecPlus>(CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_placeholder.cpp b/sycl/test/reduction/reduction_placeholder.cpp index 77633992ea2df..b0ce9ddfa88b8 100644 --- a/sycl/test/reduction/reduction_placeholder.cpp +++ b/sycl/test/reduction/reduction_placeholder.cpp @@ -19,10 +19,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { // Initialize. T CorrectOut; @@ -34,9 +32,9 @@ void test(T Identity, size_t WGSize, size_t NWItems) { (OutBuf.template get_access())[0] = Identity; - auto Out = accessor(OutBuf); + auto Out = + accessor(OutBuf); // Compute. queue Q; Q.submit([&](handler &CGH) { @@ -46,7 +44,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -65,21 +63,26 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // fast atomics and fast reduce - test>(0, 49, 49 * 5); - test>(0, 8, 8); + test>(0, 49, 49 * 5); + test>(0, 8, + 8); // fast atomics - test>(0, 7, 7 * 3); - test>(0, 4, 128); + test>(0, 7, + 7 * 3); + test>(0, 4, 128); // fast reduce - test>(getMaximumFPValue(), 5, 5 * 7); - test>(getMinimumFPValue(), 4, 128); + test>( + getMaximumFPValue(), 5, 5 * 7); + test>( + getMinimumFPValue(), 4, 128); // generic algorithm - test>(1, 7, 7 * 5); - test>(1, 8, 16); - test, 0, CustomVecPlus>(CustomVec(0), 8, 8 * 3); + test>(1, 7, 7 * 5); + test>(1, 8, 16); + test, 0, + CustomVecPlus>(CustomVec(0), 8, 8 * 3); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_transparent.cpp b/sycl/test/reduction/reduction_transparent.cpp index dea789b395401..5c7608b8930ab 100644 --- a/sycl/test/reduction/reduction_transparent.cpp +++ b/sycl/test/reduction/reduction_transparent.cpp @@ -19,14 +19,12 @@ using namespace cl::sycl; -template -class SomeIdClass; -template -class SomeNoIdClass; +template class KernelNameGroup; // Checks reductions initialized with transparent functor and explicitly set // identity value. -template +template void testId(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -46,7 +44,7 @@ void testId(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, ONEAPI::reduction(Out, Identity, BOp), [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); @@ -67,7 +65,8 @@ void testId(T Identity, size_t WGSize, size_t NWItems) { // Checks reductions initialized with transparent functor and identity // value not explicitly specified. The parameter 'Identity' is passed here // only to pre-initialize input data correctly. -template +template void testNoId(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -87,7 +86,7 @@ void testNoId(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, ONEAPI::reduction(Out, BOp), [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -104,18 +103,26 @@ void testNoId(T Identity, size_t WGSize, size_t NWItems) { } } -template +template void test(T Identity, size_t WGSize, size_t NWItems) { - testId(Identity, WGSize, NWItems); - testNoId(Identity, WGSize, NWItems); + testId, + T, Dim, BinaryOperation>(Identity, WGSize, NWItems); + testNoId, + T, Dim, BinaryOperation>(Identity, WGSize, NWItems); } int main() { #if __cplusplus >= 201402L - test>(getMinimumFPValue(), 7, 7 * 5); - test>(0, 7, 49); - test>(1, 4, 16); - test>(0, 1, 512 + 32); + test>( + getMinimumFPValue(), 7, 7 * 5); + test>(0, 7, 49); + test>(1, 4, 16); + test>( + 0, 1, 512 + 32); #endif // __cplusplus >= 201402L std::cout << "Test passed\n"; diff --git a/sycl/test/reduction/reduction_usm.cpp b/sycl/test/reduction/reduction_usm.cpp index 7b199c895f5bb..35c0ccb11f506 100644 --- a/sycl/test/reduction/reduction_usm.cpp +++ b/sycl/test/reduction/reduction_usm.cpp @@ -20,12 +20,10 @@ using namespace cl::sycl; -template -class SomeClass; -template -class Copy1; +template class KernelNameGroup; -template +template void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) { queue Q; auto Dev = Q.get_device(); @@ -45,9 +43,9 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) { return; if (AllocType == usm::alloc::device) { event E = Q.submit([&](handler &CGH) { - CGH.single_task>([=]() { - *ReduVarPtr = Identity; - }); + CGH.single_task>( + [=]() { *ReduVarPtr = Identity; }); }); E.wait(); } else { @@ -68,7 +66,8 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for>( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -98,32 +97,41 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) { free(ReduVarPtr, Q.get_context()); } -template +template void testUSM(T Identity, size_t WGSize, size_t NWItems) { - test(Identity, WGSize, NWItems, usm::alloc::shared); - test(Identity, WGSize, NWItems, usm::alloc::host); - test(Identity, WGSize, NWItems, usm::alloc::device); + test, T, Dim, + BinaryOperation>(Identity, WGSize, NWItems, usm::alloc::shared); + test, T, + Dim, BinaryOperation>(Identity, WGSize, NWItems, usm::alloc::host); + test, T, + Dim, BinaryOperation>(Identity, WGSize, NWItems, usm::alloc::device); } int main() { // fast atomics and fast reduce - testUSM>(0, 49, 49 * 5); - testUSM>(0, 8, 128); + testUSM>(0, 49, + 49 * 5); + testUSM>(0, 8, 128); // fast atomics - testUSM>(0, 7, 7 * 3); - testUSM>(0, 4, 128); + testUSM>(0, 7, + 7 * 3); + testUSM>(0, 4, + 128); // fast reduce - testUSM>(getMaximumFPValue(), 5, - 5 * 7); - testUSM>(getMinimumFPValue(), 4, 128); + testUSM>( + getMaximumFPValue(), 5, 5 * 7); + testUSM>( + getMinimumFPValue(), 4, 128); // generic algorithm - testUSM>(1, 7, 7 * 5); - testUSM>(1, 8, 16); - testUSM, 0, CustomVecPlus>( - CustomVec(0), 8, 8 * 3); + testUSM>(1, 7, 7 * 5); + testUSM>( + 1, 8, 16); + testUSM, 0, + CustomVecPlus>(CustomVec(0), 8, 8 * 3); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/sub_group/generic-shuffle.cpp b/sycl/test/sub_group/generic-shuffle.cpp index b8bb972d646a2..e6825750925fc 100644 --- a/sycl/test/sub_group/generic-shuffle.cpp +++ b/sycl/test/sub_group/generic-shuffle.cpp @@ -22,7 +22,7 @@ template class pointer_kernel; using namespace cl::sycl; -template +template void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) { try { nd_range<1> NdRange(G, L); @@ -39,29 +39,30 @@ void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) { auto acc_xor = buf_xor.template get_access(cgh); auto sgsizeacc = sgsizebuf.get_access(cgh); - cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - ONEAPI::sub_group SG = NdItem.get_sub_group(); - uint32_t wggid = NdItem.get_global_id(0); - uint32_t sgid = SG.get_group_id().get(0); - if (wggid == 0) - sgsizeacc[0] = SG.get_max_local_range()[0]; + cgh.parallel_for( + NdRange, [=](nd_item<1> NdItem) { + ONEAPI::sub_group SG = NdItem.get_sub_group(); + uint32_t wggid = NdItem.get_global_id(0); + uint32_t sgid = SG.get_group_id().get(0); + if (wggid == 0) + sgsizeacc[0] = SG.get_max_local_range()[0]; - T *ptr = static_cast(0x0) + wggid; + T *ptr = static_cast(0x0) + wggid; - /*GID of middle element in every subgroup*/ - acc[NdItem.get_global_id()] = - SG.shuffle(ptr, SG.get_max_local_range()[0] / 2); + /*GID of middle element in every subgroup*/ + acc[NdItem.get_global_id()] = + SG.shuffle(ptr, SG.get_max_local_range()[0] / 2); - /* Save GID-SGID */ - acc_up[NdItem.get_global_id()] = SG.shuffle_up(ptr, sgid); + /* Save GID-SGID */ + acc_up[NdItem.get_global_id()] = SG.shuffle_up(ptr, sgid); - /* Save GID+SGID */ - acc_down[NdItem.get_global_id()] = SG.shuffle_down(ptr, sgid); + /* Save GID+SGID */ + acc_down[NdItem.get_global_id()] = SG.shuffle_down(ptr, sgid); - /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - acc_xor[NdItem.get_global_id()] = - SG.shuffle_xor(ptr, sgid % SG.get_max_local_range()[0]); - }); + /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ + acc_xor[NdItem.get_global_id()] = + SG.shuffle_xor(ptr, sgid % SG.get_max_local_range()[0]); + }); }); auto acc = buf.template get_access(); auto acc_up = buf_up.template get_access(); @@ -116,7 +117,7 @@ void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) { } } -template +template void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) { // Fill a vector with values that will be shuffled @@ -140,29 +141,30 @@ void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) { auto sgsizeacc = sgsizebuf.get_access(cgh); auto in = buf_in.template get_access(cgh); - cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - ONEAPI::sub_group SG = NdItem.get_sub_group(); - uint32_t wggid = NdItem.get_global_id(0); - uint32_t sgid = SG.get_group_id().get(0); - if (wggid == 0) - sgsizeacc[0] = SG.get_max_local_range()[0]; + cgh.parallel_for( + NdRange, [=](nd_item<1> NdItem) { + ONEAPI::sub_group SG = NdItem.get_sub_group(); + uint32_t wggid = NdItem.get_global_id(0); + uint32_t sgid = SG.get_group_id().get(0); + if (wggid == 0) + sgsizeacc[0] = SG.get_max_local_range()[0]; - T val = in[wggid]; + T val = in[wggid]; - /*GID of middle element in every subgroup*/ - acc[NdItem.get_global_id()] = - SG.shuffle(val, SG.get_max_local_range()[0] / 2); + /*GID of middle element in every subgroup*/ + acc[NdItem.get_global_id()] = + SG.shuffle(val, SG.get_max_local_range()[0] / 2); - /* Save GID-SGID */ - acc_up[NdItem.get_global_id()] = SG.shuffle_up(val, sgid); + /* Save GID-SGID */ + acc_up[NdItem.get_global_id()] = SG.shuffle_up(val, sgid); - /* Save GID+SGID */ - acc_down[NdItem.get_global_id()] = SG.shuffle_down(val, sgid); + /* Save GID+SGID */ + acc_down[NdItem.get_global_id()] = SG.shuffle_down(val, sgid); - /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - acc_xor[NdItem.get_global_id()] = - SG.shuffle_xor(val, sgid % SG.get_max_local_range()[0]); - }); + /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ + acc_xor[NdItem.get_global_id()] = + SG.shuffle_xor(val, sgid % SG.get_max_local_range()[0]); + }); }); auto acc = buf.template get_access(); auto acc_up = buf_up.template get_access(); @@ -220,18 +222,20 @@ int main() { } // Test shuffle of pointer types - check_pointer(Queue); + check_pointer(Queue); // Test shuffle of non-native types auto ComplexFloatGenerator = [state = std::complex(0, 1)]() mutable { return state += std::complex(2, 2); }; - check_struct>(Queue, ComplexFloatGenerator); + check_struct>( + Queue, ComplexFloatGenerator); auto ComplexDoubleGenerator = [state = std::complex(0, 1)]() mutable { return state += std::complex(2, 2); }; - check_struct>(Queue, ComplexDoubleGenerator); + check_struct>( + Queue, ComplexDoubleGenerator); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test/sub_group/reduce.cpp b/sycl/test/sub_group/reduce.cpp index f7a324f10b38c..6644516f52158 100644 --- a/sycl/test/sub_group/reduce.cpp +++ b/sycl/test/sub_group/reduce.cpp @@ -24,11 +24,11 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; } diff --git a/sycl/test/sub_group/reduce.hpp b/sycl/test/sub_group/reduce.hpp index 66a737a95c8c4..f606dcf5e9898 100644 --- a/sycl/test/sub_group/reduce.hpp +++ b/sycl/test/sub_group/reduce.hpp @@ -9,11 +9,11 @@ #include "helper.hpp" #include -template class sycl_subgr; +template class sycl_subgr; using namespace cl::sycl; -template +template void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, size_t G = 256, size_t L = 64) { try { @@ -23,7 +23,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, Queue.submit([&](handler &cgh) { auto sgsizeacc = sgsizebuf.get_access(cgh); auto acc = buf.template get_access(cgh); - cgh.parallel_for>( + cgh.parallel_for( NdRange, [=](nd_item<1> NdItem) { ONEAPI::sub_group sg = NdItem.get_sub_group(); if (skip_init) { @@ -64,30 +64,48 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, } } -template void check(queue &Queue, size_t G = 256, size_t L = 64) { +template +void check(queue &Queue, size_t G = 256, size_t L = 64) { // limit data range for half to avoid rounding issues if (std::is_same::value) { G = 64; L = 32; } - check_op(Queue, T(L), ONEAPI::plus(), false, G, L); - check_op(Queue, T(0), ONEAPI::plus(), true, G, L); + check_op< + sycl_subgr, T>( + Queue, T(L), ONEAPI::plus(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::plus(), true, G, L); - check_op(Queue, T(0), ONEAPI::minimum(), false, G, L); - check_op(Queue, T(G), ONEAPI::minimum(), true, G, L); + check_op, + T>(Queue, T(0), ONEAPI::minimum(), false, G, L); + check_op, + T>(Queue, T(G), ONEAPI::minimum(), true, G, L); - check_op(Queue, T(G), ONEAPI::maximum(), false, G, L); - check_op(Queue, T(0), ONEAPI::maximum(), true, G, L); + check_op, + T>(Queue, T(G), ONEAPI::maximum(), false, G, L); + check_op, + T>(Queue, T(0), ONEAPI::maximum(), true, G, L); #if __cplusplus >= 201402L - check_op(Queue, T(L), ONEAPI::plus<>(), false, G, L); - check_op(Queue, T(0), ONEAPI::plus<>(), true, G, L); + check_op, + T>(Queue, T(L), ONEAPI::plus<>(), false, G, L); + check_op, + T>(Queue, T(0), ONEAPI::plus<>(), true, G, L); - check_op(Queue, T(0), ONEAPI::minimum<>(), false, G, L); - check_op(Queue, T(G), ONEAPI::minimum<>(), true, G, L); + check_op, + T>(Queue, T(0), ONEAPI::minimum<>(), false, G, L); + check_op, + T>(Queue, T(G), ONEAPI::minimum<>(), true, G, L); - check_op(Queue, T(G), ONEAPI::maximum<>(), false, G, L); - check_op(Queue, T(0), ONEAPI::maximum<>(), true, G, L); + check_op, + T>(Queue, T(G), ONEAPI::maximum<>(), false, G, L); + check_op< + sycl_subgr, + T>(Queue, T(0), ONEAPI::maximum<>(), true, G, L); #endif } diff --git a/sycl/test/sub_group/reduce_fp16.cpp b/sycl/test/sub_group/reduce_fp16.cpp index 17bd8507a7c90..dfe9299bfcd9e 100644 --- a/sycl/test/sub_group/reduce_fp16.cpp +++ b/sycl/test/sub_group/reduce_fp16.cpp @@ -19,7 +19,7 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; } diff --git a/sycl/test/sub_group/reduce_fp64.cpp b/sycl/test/sub_group/reduce_fp64.cpp index 1af1ae040e02e..958052e979324 100644 --- a/sycl/test/sub_group/reduce_fp64.cpp +++ b/sycl/test/sub_group/reduce_fp64.cpp @@ -24,7 +24,7 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; } diff --git a/sycl/test/sub_group/scan.cpp b/sycl/test/sub_group/scan.cpp index 32df9d63e9faa..03d3f2ab90f53 100644 --- a/sycl/test/sub_group/scan.cpp +++ b/sycl/test/sub_group/scan.cpp @@ -25,11 +25,11 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; } diff --git a/sycl/test/sub_group/scan.hpp b/sycl/test/sub_group/scan.hpp index ae9b4ced66ab0..c9d630dea2ea3 100644 --- a/sycl/test/sub_group/scan.hpp +++ b/sycl/test/sub_group/scan.hpp @@ -10,11 +10,11 @@ #include #include -template class sycl_subgr; +template class sycl_subgr; using namespace cl::sycl; -template +template void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, size_t G = 256, size_t L = 64) { try { @@ -25,7 +25,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, auto sgsizeacc = sgsizebuf.get_access(cgh); auto exacc = exbuf.template get_access(cgh); auto inacc = inbuf.template get_access(cgh); - cgh.parallel_for>( + cgh.parallel_for( NdRange, [=](nd_item<1> NdItem) { ONEAPI::sub_group sg = NdItem.get_sub_group(); if (skip_init) { @@ -72,58 +72,83 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, } } -template void check(queue &Queue, size_t G = 256, size_t L = 64) { +template +void check(queue &Queue, size_t G = 256, size_t L = 64) { // limit data range for half to avoid rounding issues if (std::is_same::value) { G = 64; L = 32; } - check_op(Queue, T(L), ONEAPI::plus(), false, G, L); - check_op(Queue, T(0), ONEAPI::plus(), true, G, L); + check_op, + T>(Queue, T(L), ONEAPI::plus(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::plus(), true, G, L); - check_op(Queue, T(0), ONEAPI::minimum(), false, G, L); + check_op< + sycl_subgr, + T>(Queue, T(0), ONEAPI::minimum(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, std::numeric_limits::infinity(), ONEAPI::minimum(), - true, G, L); + check_op< + sycl_subgr, + T>(Queue, std::numeric_limits::infinity(), ONEAPI::minimum(), + true, G, L); } else { - check_op(Queue, std::numeric_limits::max(), ONEAPI::minimum(), + check_op, + T>(Queue, std::numeric_limits::max(), ONEAPI::minimum(), true, G, L); } - check_op(Queue, T(G), ONEAPI::maximum(), false, G, L); + check_op< + sycl_subgr, + T>(Queue, T(G), ONEAPI::maximum(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, -std::numeric_limits::infinity(), - ONEAPI::maximum(), true, G, L); + check_op< + sycl_subgr, + T>(Queue, -std::numeric_limits::infinity(), ONEAPI::maximum(), + true, G, L); } else { - check_op(Queue, std::numeric_limits::min(), ONEAPI::maximum(), - true, G, L); + check_op, T>( + Queue, std::numeric_limits::min(), ONEAPI::maximum(), true, G, L); } #if __cplusplus >= 201402L - check_op(Queue, T(L), ONEAPI::plus<>(), false, G, L); - check_op(Queue, T(0), ONEAPI::plus<>(), true, G, L); + check_op, T>( + Queue, T(L), ONEAPI::plus<>(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::plus<>(), true, G, L); - check_op(Queue, T(0), ONEAPI::minimum<>(), false, G, L); + check_op< + sycl_subgr, + T>(Queue, T(0), ONEAPI::minimum<>(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, std::numeric_limits::infinity(), ONEAPI::minimum<>(), - true, G, L); + check_op< + sycl_subgr, + T>(Queue, std::numeric_limits::infinity(), ONEAPI::minimum<>(), true, + G, L); } else { - check_op(Queue, std::numeric_limits::max(), ONEAPI::minimum<>(), true, + check_op, + T>(Queue, std::numeric_limits::max(), ONEAPI::minimum<>(), true, G, L); } - check_op(Queue, T(G), ONEAPI::maximum<>(), false, G, L); + check_op, T>( + Queue, T(G), ONEAPI::maximum<>(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, -std::numeric_limits::infinity(), ONEAPI::maximum<>(), - true, G, L); + check_op, T>( + Queue, -std::numeric_limits::infinity(), ONEAPI::maximum<>(), true, + G, L); } else { - check_op(Queue, std::numeric_limits::min(), ONEAPI::maximum<>(), true, - G, L); + check_op< + sycl_subgr, + T>(Queue, std::numeric_limits::min(), ONEAPI::maximum<>(), true, G, + L); } #endif } diff --git a/sycl/test/sub_group/scan_fp16.cpp b/sycl/test/sub_group/scan_fp16.cpp index bfdb09f8f7f31..ba7d3c211aa21 100644 --- a/sycl/test/sub_group/scan_fp16.cpp +++ b/sycl/test/sub_group/scan_fp16.cpp @@ -20,7 +20,7 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; } diff --git a/sycl/test/sub_group/scan_fp64.cpp b/sycl/test/sub_group/scan_fp64.cpp index f2ecd6024e7a4..de7bbd9707464 100644 --- a/sycl/test/sub_group/scan_fp64.cpp +++ b/sycl/test/sub_group/scan_fp64.cpp @@ -25,7 +25,7 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; } From dc5e614b6d67bb80c027a46d0fa3f77b22e0d968 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Mon, 14 Sep 2020 21:28:42 +0300 Subject: [PATCH 403/465] [SYCL] Refactor -fsycl-esimd and -fsycl-int-header options (#2466) -fsycl-int-header option initialization code moved closer to code section processing other SYCL options. -fsycl-esimd enables optimizations in SYCL mode, which is not necessary after https://github.com/intel/llvm/commit/5976ff0d2776e0c438275fb4f111470032ccc53e --- clang/lib/Frontend/CompilerInvocation.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 392ba0f30f105..1f732bcd0fc3c 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -823,14 +823,10 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Args.getLastArg(OPT_emit_llvm_uselists, OPT_no_emit_llvm_uselists)) Opts.EmitLLVMUseLists = A->getOption().getID() == OPT_emit_llvm_uselists; - // ESIMD GPU Back-end requires optimized IR - bool IsSyclESIMD = Args.hasFlag(options::OPT_fsycl_esimd, - options::OPT_fno_sycl_esimd, false); - Opts.DisableLLVMPasses = Args.hasArg(OPT_disable_llvm_passes) || (Args.hasArg(OPT_fsycl_is_device) && Triple.isSPIR() && - Args.hasArg(OPT_fno_sycl_early_optimizations) && !IsSyclESIMD); + Args.hasArg(OPT_fno_sycl_early_optimizations)); Opts.DisableLifetimeMarkers = Args.hasArg(OPT_disable_lifetimemarkers); const llvm::Triple::ArchType DebugEntryValueArchs[] = { @@ -2612,6 +2608,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.SYCLValueFitInMaxInt = Args.hasFlag(options::OPT_fsycl_id_queries_fit_in_int, options::OPT_fno_sycl_id_queries_fit_in_int, false); + Opts.SYCLIntHeader = + std::string(Args.getLastArgValue(OPT_fsycl_int_header)); } Opts.IncludeDefaultHeader = Args.hasArg(OPT_finclude_default_header); @@ -2672,8 +2670,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Diags.Report(diag::warn_ignored_hip_only_option) << Args.getLastArg(OPT_gpu_max_threads_per_block_EQ)->getAsString(Args); - Opts.SYCLIntHeader = std::string(Args.getLastArgValue(OPT_fsycl_int_header)); - if (Opts.ObjC) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); From fe714e0966838285c42ce5e27c9bba9448a64ce9 Mon Sep 17 00:00:00 2001 From: sergei <57672082+s-kanaev@users.noreply.github.com> Date: Tue, 15 Sep 2020 01:23:02 +0300 Subject: [PATCH 404/465] [SYCL] Fixes in release notes (#2470) * [SYCL] Fixes in release notes * [SYCL] Fix link Signed-off-by: Sergey Kanaev --- sycl/CMakeLists.txt | 4 ++-- sycl/ReleaseNotes.md | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index f1246e4aaecb9..65bdca121eb8f 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -12,8 +12,8 @@ option(SYCL_ADD_DEV_VERSION_POSTFIX "Adds -V postfix to version string" ON) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") include(AddSYCLExecutable) -set(SYCL_MAJOR_VERSION 3) -set(SYCL_MINOR_VERSION 1) +set(SYCL_MAJOR_VERSION 4) +set(SYCL_MINOR_VERSION 0) set(SYCL_PATCH_VERSION 0) set(SYCL_DEV_ABI_VERSION 0) if (SYCL_ADD_DEV_VERSION_POSTFIX) diff --git a/sycl/ReleaseNotes.md b/sycl/ReleaseNotes.md index 3428d543b4fd9..78c8a6a3cd32a 100644 --- a/sycl/ReleaseNotes.md +++ b/sycl/ReleaseNotes.md @@ -112,6 +112,8 @@ Release notes for commit range 5976ff0..1fc0e4f - Enabled USM indirect access for interoperability kernels [ebf5c4e] ## API/ABI breakages + - Added missing constructors and propety methods for context, program and + sampler[30b8acc] ## Known issues - The format of the object files produced by the compiler can change between @@ -140,7 +142,7 @@ Release notes for commit range 5976ff0..1fc0e4f Release notes for the commit range 75b3dc2..5976ff0 ## New features - - Implemented basic support for the [Explicit SIMD extension](./sycl/doc/extensions/ExplicitSIMD/dpcpp-explicit-simd.md) + - Implemented basic support for the [Explicit SIMD extension](doc/extensions/ExplicitSIMD/dpcpp-explicit-simd.md) for low-level GPU performance tuning [84bf234] [32bf607] [a lot of others] - Implemented support for the [SYCL_INTEL_usm_address_spaces extension](https://github.com/intel/llvm/pull/1840) - Implemented support for the [Use Pinned Host Memory Property extension](doc/extensions/UsePinnedMemoryProperty/UsePinnedMemoryPropery.adoc) [e5ea144][aee2d6c][396759d] From 9dd18ca889ade1806a7693dd470440365c060736 Mon Sep 17 00:00:00 2001 From: jinge90 <43599496+jinge90@users.noreply.github.com> Date: Tue, 15 Sep 2020 16:07:54 +0800 Subject: [PATCH 405/465] [SYCL] Link SYCL device libraries by default. (#2400) As not all backends have supported spv file online link, we decided to disable fallback spv libraries online link at this time. Instead, all wrapper and fallback .o files will be linked offline. When all backends support spv online link, we will switch to online link for jit compilation transparently. Signed-off-by: gejin --- clang/include/clang/Driver/Options.td | 9 + clang/lib/Driver/Driver.cpp | 124 ++++++++++++- clang/test/Driver/sycl-device-lib-win.cpp | 93 ++++++++++ clang/test/Driver/sycl-device-lib.cpp | 93 ++++++++++ clang/test/Driver/sycl-offload-intelfpga.cpp | 30 ++-- .../test/Driver/sycl-offload-static-lib-2.cpp | 4 +- clang/test/Driver/sycl-offload-static-lib.cpp | 6 +- clang/test/Driver/sycl-offload-win.c | 4 +- clang/test/Driver/sycl-offload-with-split.c | 38 ++-- clang/test/Driver/sycl-offload.c | 68 ++++---- .../program_manager/program_manager.cpp | 19 +- sycl/test/devicelib/assert-aot.cpp | 2 +- sycl/test/devicelib/assert-windows.cpp | 3 +- sycl/test/devicelib/assert.cpp | 11 +- sycl/test/devicelib/cmath-aot.cpp | 8 +- sycl/test/devicelib/cmath_fp64_test.cpp | 5 +- sycl/test/devicelib/cmath_test.cpp | 82 ++++++--- sycl/test/devicelib/complex-fpga.cpp | 15 ++ sycl/test/devicelib/math_fp64_test.cpp | 5 +- .../test/devicelib/math_fp64_windows_test.cpp | 132 -------------- sycl/test/devicelib/math_override_test.cpp | 6 +- sycl/test/devicelib/math_test.cpp | 33 ++-- sycl/test/devicelib/math_utils.hpp | 5 + sycl/test/devicelib/math_windows_test.cpp | 121 ------------- .../devicelib/std_complex_math_fp64_test.cpp | 11 +- sycl/test/devicelib/std_complex_math_test.cpp | 164 +++++++++--------- sycl/test/spec_const/spec_const_redefine.cpp | 8 +- 27 files changed, 583 insertions(+), 516 deletions(-) create mode 100644 clang/test/Driver/sycl-device-lib-win.cpp create mode 100644 clang/test/Driver/sycl-device-lib.cpp create mode 100644 sycl/test/devicelib/complex-fpga.cpp delete mode 100644 sycl/test/devicelib/math_fp64_windows_test.cpp delete mode 100644 sycl/test/devicelib/math_windows_test.cpp diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index cebbb27609297..5ad9145fc59f2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3561,6 +3561,15 @@ def fsycl_dead_args_optimization : Flag<["-"], "fsycl-dead-args-optimization">, def fno_sycl_dead_args_optimization : Flag<["-"], "fno-sycl-dead-args-optimization">, Group, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Disables " "elimination of DPC++ dead kernel arguments">; +def fsycl_device_lib_EQ : CommaJoined<["-"], "fsycl-device-lib=">, Group, Flags<[DriverOption, CoreOption]>, + Values<"libc, libm-fp32, libm-fp64, all">, HelpText<"Control inclusion of " + "device libraries into device binary linkage. Valid arguments " + "are libc, libm-fp32, libm-fp64, all">; +def fno_sycl_device_lib_EQ : CommaJoined<["-"], "fno-sycl-device-lib=">, Group, Flags<[DriverOption, CoreOption]>, + Values<"libc, libm-fp32, libm-fp64, all">, HelpText<"Control exclusion of " + "device libraries from device binary linkage. Valid arguments " + "are libc, libm-fp32, libm-fp64, all">; + //===----------------------------------------------------------------------===// // CC1 Options //===----------------------------------------------------------------------===// diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 0c7f1cb5b4a22..9946362e8ac17 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5,7 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - #include "clang/Driver/Driver.h" #include "InputInfo.h" #include "ToolChains/AIX.h" @@ -2720,6 +2719,16 @@ static SmallVector getLinkerArgs(Compilation &C, return LibArgs; } +static bool IsSYCLDeviceLibObj(std::string ObjFilePath, bool isMSVCEnv) { + StringRef ObjFileName = llvm::sys::path::filename(ObjFilePath); + StringRef ObjSuffix = isMSVCEnv ? ".obj" : ".o"; + bool Ret = + (ObjFileName.startswith("libsycl-") && ObjFileName.endswith(ObjSuffix)) + ? true + : false; + return Ret; +} + // Goes through all of the arguments, including inputs expected for the // linker directly, to determine if we need to perform additional work for // static offload libraries. @@ -3798,7 +3807,13 @@ class OffloadingActionBuilder final { if (IA->getType() == types::TY_Object) { if (!isObjectFile(FileName)) return ABRT_Inactive; - if (Args.hasArg(options::OPT_fintelfpga)) + // For SYCL device libraries, don't need to add them to + // FPGAObjectInputs as there is no FPGA dep files inside. + + if (Args.hasArg(options::OPT_fintelfpga) && + !IsSYCLDeviceLibObj(FileName, C.getDefaultToolChain() + .getTriple() + .isWindowsMSVCEnvironment())) FPGAObjectInputs.push_back(IA); } // When creating FPGA device fat objects, all host objects are @@ -3862,6 +3877,92 @@ class OffloadingActionBuilder final { SYCLDeviceActions.clear(); } + void addSYCLDeviceLibs(const ToolChain *TC, ActionList &DeviceLinkObjects, + bool isSpirvAOT, bool isMSVCEnv) { + enum SYCLDeviceLibType { + sycl_devicelib_wrapper, + sycl_devicelib_fallback + }; + struct DeviceLibOptInfo { + StringRef devicelib_name; + StringRef devicelib_option; + }; + + bool NoDeviceLibs = false; + // Currently, libc, libm-fp32 will be linked in by default. In order + // to use libm-fp64, -fsycl-device-lib=libm-fp64/all should be used. + llvm::StringMap devicelib_link_info = { + {"libc", true}, {"libm-fp32", true}, {"libm-fp64", false}}; + if (Arg *A = Args.getLastArg(options::OPT_fsycl_device_lib_EQ, + options::OPT_fno_sycl_device_lib_EQ)) { + if (A->getValues().size() == 0) + C.getDriver().Diag(diag::warn_drv_empty_joined_argument) + << A->getAsString(Args); + else { + if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ)) + NoDeviceLibs = true; + + for (StringRef Val : A->getValues()) { + if (Val == "all") { + for (auto &K : devicelib_link_info.keys()) + devicelib_link_info[K] = true && !NoDeviceLibs; + break; + } + auto LinkInfoIter = devicelib_link_info.find(Val); + if (LinkInfoIter == devicelib_link_info.end()) { + C.getDriver().Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << Val; + } + devicelib_link_info[Val] = true && !NoDeviceLibs; + } + } + } + + SmallString<128> LibLoc(TC->getDriver().Dir); + llvm::sys::path::append(LibLoc, "/../lib"); + StringRef LibSuffix = isMSVCEnv ? ".obj" : ".o"; + SmallVector sycl_device_wrapper_libs = { + {"libsycl-crt", "libc"}, + {"libsycl-complex", "libm-fp32"}, + {"libsycl-complex-fp64", "libm-fp64"}, + {"libsycl-cmath", "libm-fp32"}, + {"libsycl-cmath-fp64", "libm-fp64"}}; + // For AOT compilation, we need to link sycl_device_fallback_libs as + // default too. + SmallVector sycl_device_fallback_libs = { + {"libsycl-fallback-cassert", "libc"}, + {"libsycl-fallback-complex", "libm-fp32"}, + {"libsycl-fallback-complex-fp64", "libm-fp64"}, + {"libsycl-fallback-cmath", "libm-fp32"}, + {"libsycl-fallback-cmath-fp64", "libm-fp64"}}; + auto addInputs = [&](SYCLDeviceLibType t) { + auto sycl_libs = (t == sycl_devicelib_wrapper) + ? sycl_device_wrapper_libs + : sycl_device_fallback_libs; + for (const DeviceLibOptInfo &Lib : sycl_libs) { + if (!devicelib_link_info[Lib.devicelib_option]) + continue; + SmallString<128> LibName(LibLoc); + llvm::sys::path::append(LibName, Lib.devicelib_name); + llvm::sys::path::replace_extension(LibName, LibSuffix); + if (llvm::sys::fs::exists(LibName)) { + Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(), + Args.MakeArgString(LibName)); + auto *SYCLDeviceLibsInputAction = + C.MakeAction(*InputArg, types::TY_Object); + auto *SYCLDeviceLibsUnbundleAction = + C.MakeAction( + SYCLDeviceLibsInputAction); + addDeviceDepences(SYCLDeviceLibsUnbundleAction); + DeviceLinkObjects.push_back(SYCLDeviceLibsUnbundleAction); + } + } + }; + addInputs(sycl_devicelib_wrapper); + if (isSpirvAOT) + addInputs(sycl_devicelib_fallback); + } + void appendLinkDependences(OffloadAction::DeviceDependences &DA) override { assert(ToolChains.size() == DeviceLinkerInputs.size() && "Toolchains and linker inputs sizes do not match."); @@ -3941,6 +4042,11 @@ class OffloadingActionBuilder final { } ActionList DeviceLibObjects; ActionList LinkObjects; + auto TT = SYCLTripleList[I]; + auto isNVPTX = (*TC)->getTriple().isNVPTX(); + bool isSpirvAOT = TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga || + TT.getSubArch() == llvm::Triple::SPIRSubArch_gen || + TT.getSubArch() == llvm::Triple::SPIRSubArch_x86_64; for (const auto &Input : LI) { // FPGA aoco does not go through the link, everything else does. if (Input->getType() == types::TY_FPGA_AOCO) @@ -3948,6 +4054,15 @@ class OffloadingActionBuilder final { else LinkObjects.push_back(Input); } + // FIXME: Link all wrapper and fallback device libraries as default, + // When spv online link is supported by all backends, the fallback + // device libraries are only needed when current toolchain is using + // AOT compilation. + if (!isNVPTX) { + addSYCLDeviceLibs( + *TC, LinkObjects, true, + C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment()); + } // The linkage actions subgraph leading to the offload wrapper. // [cond] Means incoming/outgoing dependence is created only when cond // is true. A function of: @@ -4002,7 +4117,6 @@ class OffloadingActionBuilder final { Action *DeviceLinkAction = C.MakeAction(LinkObjects, types::TY_LLVM_BC); // setup some flags upfront - auto isNVPTX = (*TC)->getTriple().isNVPTX(); if (isNVPTX && DeviceCodeSplit) { // TODO Temporary limitation, need to support code splitting for PTX @@ -4014,10 +4128,6 @@ class OffloadingActionBuilder final { D.Diag(diag::err_drv_unsupported_opt_for_target) << OptName << (*TC)->getTriple().str(); } - auto TT = SYCLTripleList[I]; - bool isSpirvAOT = TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga || - TT.getSubArch() == llvm::Triple::SPIRSubArch_gen || - TT.getSubArch() == llvm::Triple::SPIRSubArch_x86_64; // reflects whether current target is ahead-of-time and can't support // runtime setting of specialization constants bool isAOT = isNVPTX || isSpirvAOT; diff --git a/clang/test/Driver/sycl-device-lib-win.cpp b/clang/test/Driver/sycl-device-lib-win.cpp new file mode 100644 index 0000000000000..22147a977e4c5 --- /dev/null +++ b/clang/test/Driver/sycl-device-lib-win.cpp @@ -0,0 +1,93 @@ +/// +/// Perform several driver tests for SYCL device libraries on Windows +/// +// REQUIRES: clang-driver, windows + +/// ########################################################################### + +/// test behavior of device library default link +// RUN: %clangxx -fsycl %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp64 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-msvc.o" "-outputs={{.*}}libsycl-msvc-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.o" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.o" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.o" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.o" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.o" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" + +/// ########################################################################### +/// test behavior of device library link with libm-fp64 +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libm-fp64 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64 +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,libm-fp64 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64 +// RUN: %clangxx -fsycl %s -fsycl-device-lib=all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64 +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,libm-fp32,libm-fp64 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64 +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64 +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-msvc.o" "-outputs={{.*}}libsycl-msvc-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.o" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex-fp64.o" "-outputs={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.o" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath-fp64.o" "-outputs={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.o" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.o" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex-fp64.o" "-outputs={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.o" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath-fp64.o" "-outputs={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" + +/// ########################################################################### + +/// test behavior of -fno-sycl-device-lib=libc +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libc -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.o" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.o" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.o" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.o" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" + +/// ########################################################################### + +/// test behavior of -fno-sycl-device-lib=libm-fp32 +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32 +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-msvc.o" "-outputs={{.*}}libsycl-msvc-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.o" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" + +/// ########################################################################### + +/// test behavior of disabling all device libraries +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libc,libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libc,all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32,all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp64,all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libc,all,libm-fp64,libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB: {{.*}}clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown-sycldevice" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB-NEXT: {{.*}}llvm-link{{.*}} {{.*}} "--suppress-warnings" + +/// ########################################################################### + +/// test invalid value for -f[no-]sycl-device-lib +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,dummy -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_INVALID_VALUE +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=dummy,libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_NO_DEVICE_LIB_INVALID_VALUE +// SYCL_DEVICE_LIB_INVALID_VALUE: error: unsupported argument 'dummy' to option 'fsycl-device-lib=' +// SYCL_NO_DEVICE_LIB_INVALID_VALUE: error: unsupported argument 'dummy' to option 'fno-sycl-device-lib=' diff --git a/clang/test/Driver/sycl-device-lib.cpp b/clang/test/Driver/sycl-device-lib.cpp new file mode 100644 index 0000000000000..7365d4f4948f7 --- /dev/null +++ b/clang/test/Driver/sycl-device-lib.cpp @@ -0,0 +1,93 @@ +/// +/// Perform several driver tests for SYCL device libraries on Linux +/// +// REQUIRES: clang-driver, linux + +/// ########################################################################### + +/// test behavior of device library default link +// RUN: %clangxx -fsycl %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp64 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-glibc.o" "-outputs={{.*}}libsycl-glibc-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.o" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.o" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.o" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.o" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.o" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" + +/// ########################################################################### +/// test behavior of device library link with libm-fp64 +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libm-fp64 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64 +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,libm-fp64 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64 +// RUN: %clangxx -fsycl %s -fsycl-device-lib=all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64 +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,libm-fp32,libm-fp64 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64 +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64 +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-glibc.o" "-outputs={{.*}}libsycl-glibc-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.o" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex-fp64.o" "-outputs={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.o" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath-fp64.o" "-outputs={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.o" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.o" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex-fp64.o" "-outputs={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.o" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath-fp64.o" "-outputs={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" + +/// ########################################################################### + +/// test behavior of -fno-sycl-device-lib=libc +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libc -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.o" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.o" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.o" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.o" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" + +/// ########################################################################### + +/// test behavior of -fno-sycl-device-lib=libm-fp32 +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32 +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-glibc.o" "-outputs={{.*}}libsycl-glibc-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.o" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" + +/// ########################################################################### + +/// test behavior of disabling all device libraries +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libc,libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libc,all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32,all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp64,all -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libc,all,libm-fp64,libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB +// SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB: {{.*}}clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown-sycldevice" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_DEVICE_LIB-NEXT: {{.*}}llvm-link{{.*}} {{.*}} "--suppress-warnings" + +/// ########################################################################### + +/// test invalid value for -f[no-]sycl-device-lib +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,dummy -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_INVALID_VALUE +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=dummy,libm-fp32 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_NO_DEVICE_LIB_INVALID_VALUE +// SYCL_DEVICE_LIB_INVALID_VALUE: error: unsupported argument 'dummy' to option 'fsycl-device-lib=' +// SYCL_NO_DEVICE_LIB_INVALID_VALUE: error: unsupported argument 'dummy' to option 'fno-sycl-device-lib=' diff --git a/clang/test/Driver/sycl-offload-intelfpga.cpp b/clang/test/Driver/sycl-offload-intelfpga.cpp index 7c76b19db1eb7..c42e60aacf07a 100644 --- a/clang/test/Driver/sycl-offload-intelfpga.cpp +++ b/clang/test/Driver/sycl-offload-intelfpga.cpp @@ -21,11 +21,11 @@ /// -fintelfpga -fsycl-link tests // RUN: touch %t.o -// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fintelfpga -fsycl-link %t.o -o libfoo.a 2>&1 \ +// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -fsycl-link %t.o -o libfoo.a 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK,CHK-FPGA-EARLY %s -// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fintelfpga -fsycl-link=early %t.o -o libfoo.a 2>&1 \ +// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -fsycl-link=early %t.o -o libfoo.a 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK,CHK-FPGA-EARLY %s -// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fintelfpga -fsycl-link=image %t.o -o libfoo.a 2>&1 \ +// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -fsycl-link=image %t.o -o libfoo.a 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK,CHK-FPGA-IMAGE %s // CHK-FPGA-LINK-NOT: clang-offload-bundler{{.*}} "-check-section" // CHK-FPGA-LINK: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_fpga-unknown-unknown-sycldevice" "-inputs=[[INPUT:.+\.o]]" "-outputs=[[OUTPUT1:.+\.o]]" "-unbundle" @@ -50,9 +50,9 @@ /// -fintelfpga -fsycl-link clang-cl specific // RUN: touch %t.obj -// RUN: %clang_cl -### -fsycl -fintelfpga -fsycl-link %t.obj -Folibfoo.lib 2>&1 \ +// RUN: %clang_cl -### -fsycl -fintelfpga -fno-sycl-device-lib=all -fsycl-link %t.obj -Folibfoo.lib 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK-WIN %s -// RUN: %clang_cl -### -fsycl -fintelfpga -fsycl-link %t.obj -o libfoo.lib 2>&1 \ +// RUN: %clang_cl -### -fsycl -fintelfpga -fno-sycl-device-lib=all -fsycl-link %t.obj -o libfoo.lib 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK-WIN %s // CHK-FPGA-LINK-WIN: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_fpga-unknown-unknown-sycldevice{{.*}}" "-inputs=[[INPUT:.+\.obj]]" "-outputs=[[OUTPUT1:.+\.obj]]" "-unbundle" // CHK-FPGA-LINK-WIN-NOT: clang-offload-bundler{{.*}} @@ -185,9 +185,9 @@ /// -fintelfpga -fsycl-link from source // RUN: touch %t.cpp -// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fintelfpga -fsycl-link=early %t.cpp -ccc-print-phases 2>&1 \ +// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -fsycl-link=early %t.cpp -ccc-print-phases 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK-SRC,CHK-FPGA-LINK-SRC-DEFAULT %s -// RUN: %clang_cl -### -target x86_64-unknown-linux-gnu -fsycl -fintelfpga -fsycl-link=early %t.cpp -ccc-print-phases 2>&1 \ +// RUN: %clang_cl -### -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -fsycl-link=early %t.cpp -ccc-print-phases 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK-SRC,CHK-FPGA-LINK-SRC-CL %s // CHK-FPGA-LINK-SRC: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl) // CHK-FPGA-LINK-SRC: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) @@ -275,9 +275,9 @@ /// -fintelfpga dependency file use from object phases test // RUN: touch %t-1.o -// RUN: %clangxx -fsycl -fintelfpga -ccc-print-phases -### %t-1.o 2>&1 \ +// RUN: %clangxx -fsycl -fno-sycl-device-lib=all -fintelfpga -ccc-print-phases -### %t-1.o 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-FPGA-DEP-FILES-OBJ-PHASES -DINPUT=%t-1.o %s -// RUN: %clang_cl -fsycl -fintelfpga -ccc-print-phases -### %t-1.o 2>&1 \ +// RUN: %clang_cl -fsycl -fno-sycl-device-lib=all -fintelfpga -ccc-print-phases -### %t-1.o 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-FPGA-DEP-FILES-OBJ-PHASES -DINPUT=%t-1.o %s // CHK-FPGA-DEP-FILES-OBJ-PHASES: 0: input, "[[INPUT]]", object, (host-sycl) // CHK-FPGA-DEP-FILES-OBJ-PHASES: 1: clang-offload-unbundler, {0}, object, (host-sycl) @@ -348,7 +348,7 @@ // RUN: llc -filetype=obj -o %t-aoco_cl.o %t-aoco_cl.bc // RUN: llvm-ar crv %t_aoco.a %t.o %t2.o %t-aoco.o // RUN: llvm-ar crv %t_aoco_cl.a %t.o %t2_cl.o %t-aoco_cl.o -// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fintelfpga -foffload-static-lib=%t_aoco.a %s -### -ccc-print-phases 2>&1 \ +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -foffload-static-lib=%t_aoco.a %s -### -ccc-print-phases 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-FPGA-AOCO-PHASES %s // CHK-FPGA-AOCO-PHASES: 0: input, "[[INPUTA:.+\.a]]", object, (host-sycl) // CHK-FPGA-AOCO-PHASES: 1: input, "[[INPUTCPP:.+\.cpp]]", c++, (host-sycl) @@ -377,7 +377,7 @@ // CHK-FPGA-AOCO-PHASES: 24: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {23}, image /// FPGA AOCO Windows phases check -// RUN: %clang_cl -fsycl -fintelfpga -foffload-static-lib=%t_aoco_cl.a %s -### -ccc-print-phases 2>&1 \ +// RUN: %clang_cl -fsycl -fno-sycl-device-lib=all -fintelfpga -foffload-static-lib=%t_aoco_cl.a %s -### -ccc-print-phases 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-AOCO-PHASES-WIN %s // CHK-FPGA-AOCO-PHASES-WIN: 0: input, "{{.*}}", object, (host-sycl) // CHK-FPGA-AOCO-PHASES-WIN: 1: input, "[[INPUTSRC:.+\.cpp]]", c++, (host-sycl) @@ -405,13 +405,13 @@ // CHK-FPGA-AOCO-PHASES-WIN: 23: offload, "host-sycl (x86_64-pc-windows-msvc)" {10}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {22}, image /// aoco test, checking tools -// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fintelfpga -foffload-static-lib=%t_aoco.a -### %s 2>&1 \ +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -foffload-static-lib=%t_aoco.a -### %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-AOCO,CHK-FPGA-AOCO-LIN %s -// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fintelfpga %t_aoco.a -### %s 2>&1 \ +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga %t_aoco.a -### %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-AOCO,CHK-FPGA-AOCO-LIN %s -// RUN: %clang_cl -fsycl -fintelfpga -foffload-static-lib=%t_aoco_cl.a -### %s 2>&1 \ +// RUN: %clang_cl -fsycl -fno-sycl-device-lib=all -fintelfpga -foffload-static-lib=%t_aoco_cl.a -### %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-AOCO,CHK-FPGA-AOCO-WIN %s -// RUN: %clang_cl -fsycl -fintelfpga %t_aoco_cl.a -### %s 2>&1 \ +// RUN: %clang_cl -fsycl -fno-sycl-device-lib=all -fintelfpga %t_aoco_cl.a -### %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-AOCO,CHK-FPGA-AOCO-WIN %s // CHK-FPGA-AOCO-LIN: clang-offload-bundler{{.*}} "-type=ao" "-targets=sycl-fpga_aoco-intel-unknown-sycldevice" "-inputs=[[INPUTLIB:.+\.a]]" "-check-section" // CHK-FPGA-AOCO-LIN: clang{{.*}} "-emit-obj" {{.*}} "-o" "[[HOSTOBJ:.+\.o]]" diff --git a/clang/test/Driver/sycl-offload-static-lib-2.cpp b/clang/test/Driver/sycl-offload-static-lib-2.cpp index eff45a9d19f30..cc97a6a53a8de 100644 --- a/clang/test/Driver/sycl-offload-static-lib-2.cpp +++ b/clang/test/Driver/sycl-offload-static-lib-2.cpp @@ -99,9 +99,9 @@ /// ########################################################################### /// test behaviors of static lib with no source/object -// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -L/dummy/dir %t.a -### 2>&1 \ +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -L/dummy/dir %t.a -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=STATIC_LIB_NOSRC -DINPUTLIB=%t.a -// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -L/dummy/dir %t.lo -### 2>&1 \ +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -L/dummy/dir %t.lo -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=STATIC_LIB_NOSRC -DINPUTLIB=%t.lo // STATIC_LIB_NOSRC: clang-offload-bundler{{.*}} "-type=ao" "-targets=host-x86_64-unknown-linux-gnu" "-inputs=[[INPUTLIB]]" "-check-section" // STATIC_LIB_NOSRC: ld{{.*}} "-r" "-o" "[[PARTIALOBJ:.+\.o]]" "{{.*}}crt1.o" {{.*}} "-L/dummy/dir" {{.*}} "[[INPUTLIB]]" diff --git a/clang/test/Driver/sycl-offload-static-lib.cpp b/clang/test/Driver/sycl-offload-static-lib.cpp index e32b244162e38..95cb56245d539 100644 --- a/clang/test/Driver/sycl-offload-static-lib.cpp +++ b/clang/test/Driver/sycl-offload-static-lib.cpp @@ -47,7 +47,7 @@ /// test behaviors of -foffload-static-lib= from source // RUN: touch %t.a -// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -foffload-static-lib=%t.a -ccc-print-phases %s 2>&1 \ +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -foffload-static-lib=%t.a -ccc-print-phases %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=FOFFLOAD_STATIC_LIB_SRC // FOFFLOAD_STATIC_LIB_SRC: 0: input, "[[INPUTA:.+\.a]]", object, (host-sycl) @@ -122,9 +122,9 @@ /// ########################################################################### /// test behaviors of -foffload-static-lib with no source/object -// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -L/dummy/dir -foffload-static-lib=%t.a -### -ccc-print-phases 2>&1 \ +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -L/dummy/dir -foffload-static-lib=%t.a -### -ccc-print-phases 2>&1 \ // RUN: | FileCheck %s -check-prefixes=FOFFLOAD_STATIC_LIB_NOSRC_PHASES,FOFFLOAD_STATIC_LIB_NOSRC_PHASES_1 -// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -L/dummy/dir -foffload-whole-static-lib=%t.a -### -ccc-print-phases 2>&1 \ +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -L/dummy/dir -foffload-whole-static-lib=%t.a -### -ccc-print-phases 2>&1 \ // RUN: | FileCheck %s -check-prefixes=FOFFLOAD_STATIC_LIB_NOSRC_PHASES,FOFFLOAD_STATIC_LIB_NOSRC_PHASES_2 // FOFFLOAD_STATIC_LIB_NOSRC_PHASES: 0: input, "[[INPUTA:.+\.a]]", object, (host-sycl) // FOFFLOAD_STATIC_LIB_NOSRC_PHASES: 1: linker, {0}, image, (host-sycl) diff --git a/clang/test/Driver/sycl-offload-win.c b/clang/test/Driver/sycl-offload-win.c index 87db2f5fdc15a..1c41c844333a4 100644 --- a/clang/test/Driver/sycl-offload-win.c +++ b/clang/test/Driver/sycl-offload-win.c @@ -57,9 +57,9 @@ /// Test behaviors of -foffload-static-lib= from source. // RUN: touch %t.lib -// RUN: %clang --target=x86_64-pc-windows-msvc -fsycl -foffload-static-lib=%t.lib -ccc-print-phases %s 2>&1 \ +// RUN: %clang --target=x86_64-pc-windows-msvc -fsycl -fno-sycl-device-lib=all -foffload-static-lib=%t.lib -ccc-print-phases %s 2>&1 \ // RUN: | FileCheck -DLIB=%t.lib %s -check-prefix=FOFFLOAD_STATIC_LIB_SRC -// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -foffload-static-lib=%t.lib -ccc-print-phases %s 2>&1 \ +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -fno-sycl-device-lib=all -foffload-static-lib=%t.lib -ccc-print-phases %s 2>&1 \ // RUN: | FileCheck -DLIB=%t.lib %s -check-prefix=FOFFLOAD_STATIC_LIB_SRC // FOFFLOAD_STATIC_LIB_SRC: 0: input, "[[INPUTLIB:.+\.lib]]", object, (host-sycl) diff --git a/clang/test/Driver/sycl-offload-with-split.c b/clang/test/Driver/sycl-offload-with-split.c index 4d01f4d5fb800..b8eb462f6448c 100644 --- a/clang/test/Driver/sycl-offload-with-split.c +++ b/clang/test/Driver/sycl-offload-with-split.c @@ -12,17 +12,17 @@ /// preprocessor and another one joining the device linking outputs to the host /// action. The same graph should be generated when no -fsycl-targets is used /// The same phase graph will be used with -fsycl-use-bitcode -// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-device-code-split -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-DEFAULT-MODE %s -// RUN: %clang_cl -ccc-print-phases -fsycl -fsycl-device-code-split=per_source -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang_cl -ccc-print-phases -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split=per_source -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-CL-MODE %s -// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-device-code-split=per_source -fno-sycl-use-bitcode %s 2>&1 \ +// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split=per_source -fno-sycl-use-bitcode %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-DEFAULT-MODE %s -// RUN: %clang_cl -ccc-print-phases -fsycl -fsycl-device-code-split=per_source -fno-sycl-use-bitcode %s 2>&1 \ +// RUN: %clang_cl -ccc-print-phases -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split=per_source -fno-sycl-use-bitcode %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-CL-MODE %s -// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-device-code-split=per_source -fsycl-use-bitcode %s 2>&1 \ +// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split=per_source -fsycl-use-bitcode %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-DEFAULT-MODE %s -// RUN: %clang_cl -ccc-print-phases -fsycl -fsycl-device-code-split=per_source -fsycl-use-bitcode %s 2>&1 \ +// RUN: %clang_cl -ccc-print-phases -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split=per_source -fsycl-use-bitcode %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-CL-MODE %s // CHK-PHASES: 0: input, "[[INPUT:.+\.c]]", c, (host-sycl) // CHK-PHASES: 1: preprocessor, {0}, cpp-output, (host-sycl) @@ -49,7 +49,7 @@ /// Check the phases also add a library to make sure it is treated as input by /// the device. -// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -lsomelib -fsycl -fsycl-device-code-split -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -lsomelib -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES-LIB %s // CHK-PHASES-LIB: 0: input, "somelib", object, (host-sycl) // CHK-PHASES-LIB: 1: input, "[[INPUT:.+\.c]]", c, (host-sycl) @@ -75,7 +75,7 @@ /// Check the phases when using and multiple source files // RUN: echo " " > %t.c -// RUN: %clang -ccc-print-phases -lsomelib -target x86_64-unknown-linux-gnu -fsycl -fsycl-device-code-split -fsycl-targets=spir64-unknown-unknown-sycldevice %s %t.c 2>&1 \ +// RUN: %clang -ccc-print-phases -lsomelib -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64-unknown-unknown-sycldevice %s %t.c 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES-FILES %s // CHK-PHASES-FILES: 0: input, "somelib", object, (host-sycl) @@ -112,11 +112,11 @@ /// Check separate compilation with offloading - unbundling actions // RUN: touch %t.o -// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-device-code-split -o %t.out -lsomelib -fsycl-targets=spir64-unknown-unknown-sycldevice %t.o 2>&1 \ +// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -o %t.out -lsomelib -fsycl-targets=spir64-unknown-unknown-sycldevice %t.o 2>&1 \ // RUN: | FileCheck -DINPUT=%t.o -check-prefix=CHK-UBACTIONS %s // RUN: mkdir -p %t_dir // RUN: touch %t_dir/dummy -// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-device-code-split -o %t.out -lsomelib -fsycl-targets=spir64-unknown-unknown-sycldevice %t_dir/dummy 2>&1 \ +// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -o %t.out -lsomelib -fsycl-targets=spir64-unknown-unknown-sycldevice %t_dir/dummy 2>&1 \ // RUN: | FileCheck -DINPUT=%t_dir/dummy -check-prefix=CHK-UBACTIONS %s // CHK-UBACTIONS: 0: input, "somelib", object, (host-sycl) // CHK-UBACTIONS: 1: input, "[[INPUT]]", object, (host-sycl) @@ -134,7 +134,7 @@ /// Check separate compilation with offloading - unbundling with source // RUN: touch %t.o -// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -lsomelib -fsycl -fsycl-device-code-split %t.o -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -lsomelib -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split %t.o -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-UBUACTIONS %s // CHK-UBUACTIONS: 0: input, "somelib", object, (host-sycl) // CHK-UBUACTIONS: 1: input, "[[INPUT1:.+\.o]]", object, (host-sycl) @@ -161,11 +161,11 @@ /// ########################################################################### /// Ahead of Time compilation for fpga, gen, cpu -// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fsycl-device-code-split -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-FPGA -// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fsycl-device-code-split -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-GEN -// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fsycl-device-code-split -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-CPU // CHK-PHASES-AOT: 0: input, "[[INPUT:.+\.c]]", c, (host-sycl) // CHK-PHASES-AOT: 1: preprocessor, {0}, cpp-output, (host-sycl) @@ -196,13 +196,13 @@ /// ########################################################################### /// Ahead of Time compilation for fpga, gen, cpu - tool invocation -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-device-code-split -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-FPGA -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-device-code-split -fintelfpga %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fintelfpga %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-FPGA -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-device-code-split -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-GEN -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-device-code-split -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-CPU // CHK-TOOLS-AOT: clang{{.*}} "-fsycl-is-device" {{.*}} "-o" "[[OUTPUT1:.+\.bc]]" // CHK-TOOLS-AOT: llvm-link{{.*}} "[[OUTPUT1]]" "-o" "[[OUTPUT2:.+\.bc]]" @@ -226,7 +226,7 @@ /// ########################################################################### /// offload with multiple targets, including AOT -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-device-code-split -fsycl-targets=spir64-unknown-unknown-sycldevice,spir64_fpga-unknown-unknown-sycldevice,spir64_gen-unknown-unknown-sycldevice -### -ccc-print-phases %s 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64-unknown-unknown-sycldevice,spir64_fpga-unknown-unknown-sycldevice,spir64_gen-unknown-unknown-sycldevice -### -ccc-print-phases %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG %s // CHK-PHASE-MULTI-TARG: 0: input, "[[INPUT:.+\.c]]", c, (host-sycl) // CHK-PHASE-MULTI-TARG: 1: preprocessor, {0}, cpp-output, (host-sycl) diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index aeea104e524bd..3bf790d87a6ce 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -161,17 +161,17 @@ /// preprocessor and another one joining the device linking outputs to the host /// action. The same graph should be generated when no -fsycl-targets is used /// The same phase graph will be used with -fsycl-use-bitcode -// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-DEFAULT-MODE %s -// RUN: %clang_cl -ccc-print-phases -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang_cl -ccc-print-phases -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-CL-MODE %s -// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-use-bitcode %s 2>&1 \ +// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-use-bitcode -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-DEFAULT-MODE %s -// RUN: %clang_cl -ccc-print-phases -fsycl -fno-sycl-use-bitcode %s 2>&1 \ +// RUN: %clang_cl -ccc-print-phases -fsycl -fno-sycl-use-bitcode -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-CL-MODE %s -// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-use-bitcode %s 2>&1 \ +// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fsycl-use-bitcode -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-DEFAULT-MODE %s -// RUN: %clang_cl -ccc-print-phases -fsycl -fsycl-use-bitcode %s 2>&1 \ +// RUN: %clang_cl -ccc-print-phases -fsycl -fsycl-use-bitcode -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-CL-MODE %s // CHK-PHASES: 0: input, "[[INPUT:.+\.c]]", c, (host-sycl) // CHK-PHASES: 1: preprocessor, {0}, cpp-output, (host-sycl) @@ -208,7 +208,7 @@ /// Check the phases also add a library to make sure it is treated as input by /// the device. -// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -lsomelib -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -lsomelib -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES-LIB %s // CHK-PHASES-LIB: 0: input, "somelib", object, (host-sycl) // CHK-PHASES-LIB: 1: input, "[[INPUT:.+\.c]]", c, (host-sycl) @@ -241,7 +241,7 @@ /// Check the phases when using and multiple source files // RUN: echo " " > %t.c -// RUN: %clang -ccc-print-phases -lsomelib -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice %s %t.c 2>&1 \ +// RUN: %clang -ccc-print-phases -lsomelib -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -fno-sycl-device-lib=all %s %t.c 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES-FILES %s // CHK-PHASES-FILES: 0: input, "somelib", object, (host-sycl) @@ -296,11 +296,11 @@ /// Check separate compilation with offloading - unbundling actions // RUN: touch %t.o -// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -o %t.out -lsomelib -fsycl-targets=spir64-unknown-unknown-sycldevice %t.o 2>&1 \ +// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -o %t.out -lsomelib -fsycl-targets=spir64-unknown-unknown-sycldevice %t.o 2>&1 \ // RUN: | FileCheck -DINPUT=%t.o -check-prefix=CHK-UBACTIONS %s // RUN: mkdir -p %t_dir // RUN: touch %t_dir/dummy -// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -o %t.out -lsomelib -fsycl-targets=spir64-unknown-unknown-sycldevice %t_dir/dummy 2>&1 \ +// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -o %t.out -lsomelib -fsycl-targets=spir64-unknown-unknown-sycldevice %t_dir/dummy 2>&1 \ // RUN: | FileCheck -DINPUT=%t_dir/dummy -check-prefix=CHK-UBACTIONS %s // CHK-UBACTIONS: 0: input, "somelib", object, (host-sycl) // CHK-UBACTIONS: 1: input, "[[INPUT]]", object, (host-sycl) @@ -318,7 +318,7 @@ /// Check separate compilation with offloading - unbundling with source // RUN: touch %t.o -// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -lsomelib -fsycl %t.o -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -lsomelib -fsycl -fno-sycl-device-lib=all %t.o -fsycl-targets=spir64-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-UBUACTIONS %s // CHK-UBUACTIONS: 0: input, "somelib", object, (host-sycl) // CHK-UBUACTIONS: 1: input, "[[INPUT1:.+\.o]]", object, (host-sycl) @@ -508,7 +508,7 @@ /// Check regular offload with an additional AOT binary passed through -fsycl-add-targets (same triple) -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -fsycl-add-targets=spir64-unknown-unknown-sycldevice:dummy.spv -ccc-print-phases %s 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64-unknown-unknown-sycldevice -fsycl-add-targets=spir64-unknown-unknown-sycldevice:dummy.spv -ccc-print-phases %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-ADD-TARGETS-REG %s // CHK-ADD-TARGETS-REG: 0: input, "[[INPUT:.+\.c]]", c, (host-sycl) // CHK-ADD-TARGETS-REG: 1: preprocessor, {0}, cpp-output, (host-sycl) @@ -534,7 +534,7 @@ /// ########################################################################### /// Check regular offload with multiple additional AOT binaries passed through -fsycl-add-targets -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -fsycl-add-targets=spir64_fpga-unknown-unknown-sycldevice:dummy.aocx,spir64_gen-unknown-unknown-sycldevice:dummy_Gen9core.bin,spir64_x86_64-unknown-unknown-sycldevice:dummy.ir -ccc-print-phases %s 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64-unknown-unknown-sycldevice -fsycl-add-targets=spir64_fpga-unknown-unknown-sycldevice:dummy.aocx,spir64_gen-unknown-unknown-sycldevice:dummy_Gen9core.bin,spir64_x86_64-unknown-unknown-sycldevice:dummy.ir -ccc-print-phases %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-ADD-TARGETS-REG-MUL %s // CHK-ADD-TARGETS-REG-MUL: 0: input, "[[INPUT:.+\.c]]", c, (host-sycl) // CHK-ADD-TARGETS-REG-MUL: 1: preprocessor, {0}, cpp-output, (host-sycl) @@ -609,11 +609,11 @@ /// ########################################################################### /// Ahead of Time compilation for fpga, gen, cpu -// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-FPGA -// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-GEN -// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-CPU // CHK-PHASES-AOT: 0: input, "[[INPUT:.+\.c]]", c, (host-sycl) // CHK-PHASES-AOT: 1: preprocessor, {0}, cpp-output, (host-sycl) @@ -642,29 +642,29 @@ /// ########################################################################### /// Ahead of Time compilation for fpga, gen, cpu - tool invocation -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-FPGA,CHK-TOOLS-FPGA-USM-DISABLE -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fintelfpga %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-FPGA,CHK-TOOLS-FPGA-USM-DISABLE -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice -Xshardware %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice -Xshardware %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-FPGA,CHK-TOOLS-FPGA-USM-ENABLE -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fintelfpga -Xshardware %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -Xshardware %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-FPGA,CHK-TOOLS-FPGA-USM-ENABLE -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice -Xssimulation %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice -Xssimulation %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-FPGA,CHK-TOOLS-FPGA-USM-ENABLE -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fintelfpga -Xssimulation %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -Xssimulation %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-FPGA,CHK-TOOLS-FPGA-USM-ENABLE -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice -Xsemulator %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice -Xsemulator %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-FPGA,CHK-TOOLS-FPGA-USM-DISABLE -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fintelfpga -Xsemulator %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -Xsemulator %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-FPGA,CHK-TOOLS-FPGA-USM-DISABLE -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-GEN -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-CPU -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-GEN -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s -### 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-AOT,CHK-TOOLS-CPU // CHK-TOOLS-AOT: clang{{.*}} "-fsycl-is-device" {{.*}} "-o" "[[OUTPUT1:.+\.bc]]" // CHK-TOOLS-AOT: llvm-link{{.*}} "[[OUTPUT1]]" "-o" "[[OUTPUT2:.+\.bc]]" @@ -785,7 +785,7 @@ /// ########################################################################### /// offload with multiple targets, including AOT -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice,spir64_fpga-unknown-unknown-sycldevice,spir64_gen-unknown-unknown-sycldevice -### -ccc-print-phases %s 2>&1 \ +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64-unknown-unknown-sycldevice,spir64_fpga-unknown-unknown-sycldevice,spir64_gen-unknown-unknown-sycldevice -### -ccc-print-phases %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG %s // CHK-PHASE-MULTI-TARG: 0: input, "[[INPUT:.+\.c]]", c, (host-sycl) // CHK-PHASE-MULTI-TARG: 1: preprocessor, {0}, cpp-output, (host-sycl) @@ -824,9 +824,9 @@ /// ########################################################################### /// Verify that -save-temps does not crash -// RUN: %clang -fsycl -target x86_64-unknown-linux-gnu -save-temps %s -### 2>&1 -// RUN: %clang -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -target x86_64-unknown-linux-gnu -save-temps %s -### 2>&1 -// RUN: %clangxx -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -target x86_64-unknown-linux-gnu -save-temps %s -### 2>&1 \ +// RUN: %clang -fsycl -fno-sycl-device-lib=all -target x86_64-unknown-linux-gnu -save-temps %s -### 2>&1 +// RUN: %clang -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64-unknown-unknown-sycldevice -target x86_64-unknown-linux-gnu -save-temps %s -### 2>&1 +// RUN: %clangxx -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64-unknown-unknown-sycldevice -target x86_64-unknown-linux-gnu -save-temps %s -### 2>&1 \ // RUN: | FileCheck %s --check-prefixes=CHK-FSYCL-SAVE-TEMPS,CHK-FSYCL-SAVE-TEMPS-CONFL // CHK-FSYCL-SAVE-TEMPS: clang{{.*}} "-fsycl-is-device"{{.*}} "-o" "[[DEVICE_BASE_NAME:[a-z0-9-]+]].ii" // CHK-FSYCL-SAVE-TEMPS: clang{{.*}} "-fsycl-is-device"{{.*}} "-o" "[[DEVICE_BASE_NAME]].bc"{{.*}} "[[DEVICE_BASE_NAME]].ii" @@ -858,9 +858,9 @@ /// passing of a library should not trigger the unbundler // RUN: touch %t.a // RUN: touch %t.lib -// RUN: %clang -ccc-print-phases -fsycl %t.a %s 2>&1 \ +// RUN: %clang -ccc-print-phases -fsycl -fno-sycl-device-lib=all %t.a %s 2>&1 \ // RUN: | FileCheck -check-prefix=LIB-UNBUNDLE-CHECK %s -// RUN: %clang_cl -ccc-print-phases -fsycl %t.lib %s 2>&1 \ +// RUN: %clang_cl -ccc-print-phases -fsycl -fno-sycl-device-lib=all %t.lib %s 2>&1 \ // RUN: | FileCheck -check-prefix=LIB-UNBUNDLE-CHECK %s // LIB-UNBUNDLE-CHECK-NOT: clang-offload-unbundler diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index c597b70669af6..89d23d65937be 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -371,6 +371,9 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means // no fallback device library will be linked. uint32_t DeviceLibReqMask = 0; + // FIXME: disable the fallback device libraries online link as not all + // backend supports spv online link. Need to enable it when all backends + // support spv online link. if (Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV && !SYCLConfig::get()) DeviceLibReqMask = getDeviceLibReqMask(Img); @@ -777,16 +780,12 @@ ProgramManager::ProgramPtr ProgramManager::build( LinkOpts = LinkOptions.c_str(); } - // The Level Zero driver support for online linking currently has bugs, but - // we think the DPC++ runtime support is ready. This environment variable - // gates the runtime support for online linking, so we can try enabling if a - // new driver is released before the next DPC++ release. - static bool EnableLevelZeroLink = std::getenv("SYCL_ENABLE_LEVEL_ZERO_LINK"); - if (!EnableLevelZeroLink) { - if (Context->getPlugin().getBackend() == backend::level_zero) { - LinkDeviceLibs = false; - } - } + // TODO: Because online linking isn't implemented yet on Level Zero, the + // compiler always links against the fallback device libraries. Once + // online linking is supported on all backends, we should remove the line + // below and also change the compiler, so it no longer links the fallback + // code unconditionally. + LinkDeviceLibs = false; // TODO: this is a temporary workaround for GPU tests for ESIMD compiler. // We do not link with other device libraries, because it may fail diff --git a/sycl/test/devicelib/assert-aot.cpp b/sycl/test/devicelib/assert-aot.cpp index f360836f40e5d..811f174b399d7 100644 --- a/sycl/test/devicelib/assert-aot.cpp +++ b/sycl/test/devicelib/assert-aot.cpp @@ -1,5 +1,5 @@ // REQUIRES: opencl-aot, cpu, linux -// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/assert.cpp %sycl_libs_dir/libsycl-crt.o %sycl_libs_dir/libsycl-fallback-cassert.o -o %t.aot.out +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/assert.cpp -o %t.aot.out // RUN: %CPU_RUN_PLACEHOLDER %t.aot.out >%t.aot.msg // RUN: FileCheck %S/assert.cpp --input-file %t.aot.msg --check-prefixes=CHECK-MESSAGE diff --git a/sycl/test/devicelib/assert-windows.cpp b/sycl/test/devicelib/assert-windows.cpp index 5f6de9a82cdfd..01db1858e5e67 100644 --- a/sycl/test/devicelib/assert-windows.cpp +++ b/sycl/test/devicelib/assert-windows.cpp @@ -4,8 +4,7 @@ // Disable the test until the fix reaches SYCL test infrastructure. // XFAIL: * // -// RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-crt.obj -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // // MSVC implementation of assert does not call an unreachable built-in, so the // program doesn't terminate when fallback is used. diff --git a/sycl/test/devicelib/assert.cpp b/sycl/test/devicelib/assert.cpp index 1aaf21192f3b8..1afa4169bab73 100644 --- a/sycl/test/devicelib/assert.cpp +++ b/sycl/test/devicelib/assert.cpp @@ -1,6 +1,5 @@ // REQUIRES: cpu,linux -// RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-crt.o -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // (see the other RUN lines below; it is a bit complicated) // // assert() call in device code guarantees nothing: on some devices it behaves @@ -76,14 +75,6 @@ // RUN: FileCheck %s --input-file %t.stdout.native --check-prefixes=CHECK-NATIVE || FileCheck %s --input-file %t.stderr.native --check-prefix CHECK-NOTSUPPORTED // RUN: FileCheck %s --input-file %t.stderr.native --check-prefixes=CHECK-MESSAGE || FileCheck %s --input-file %t.stderr.native --check-prefix CHECK-NOTSUPPORTED // -// RUN: env SYCL_PI_TRACE=2 SYCL_DEVICELIB_INHIBIT_NATIVE=cl_intel_devicelib_assert SYCL_DEVICE_TYPE=CPU %t.out >%t.stdout.pi.fallback -// RUN: env SYCL_DEVICELIB_INHIBIT_NATIVE=cl_intel_devicelib_assert SYCL_DEVICE_TYPE=CPU %t.out >%t.stdout.msg.fallback -// RUN: FileCheck %s --input-file %t.stdout.pi.fallback --check-prefixes=CHECK-FALLBACK -// RUN: FileCheck %s --input-file %t.stdout.msg.fallback --check-prefixes=CHECK-MESSAGE -// -// CHECK-NATIVE: ---> piProgramBuild -// CHECK-FALLBACK: ---> piProgramLink -// // Skip the test if the CPU RT doesn't support the extension yet: // CHECK-NOTSUPPORTED: Device has no support for cl_intel_devicelib_assert // diff --git a/sycl/test/devicelib/cmath-aot.cpp b/sycl/test/devicelib/cmath-aot.cpp index e58a87d9f51bc..4eee5f65b221b 100644 --- a/sycl/test/devicelib/cmath-aot.cpp +++ b/sycl/test/devicelib/cmath-aot.cpp @@ -1,14 +1,14 @@ // REQUIRES: opencl-aot, cpu // UNSUPPORTED: windows -// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/cmath_test.cpp %sycl_libs_dir/libsycl-cmath.o %sycl_libs_dir/libsycl-fallback-cmath.o -o %t.cmath.out +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/cmath_test.cpp -o %t.cmath.out // RUN: %CPU_RUN_PLACEHOLDER %t.cmath.out -// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/cmath_fp64_test.cpp %sycl_libs_dir/libsycl-cmath-fp64.o %sycl_libs_dir/libsycl-fallback-cmath-fp64.o -o %t.cmath.fp64.out +// RUN: %clangxx -fsycl -fsycl-device-lib=libm-fp64 -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/cmath_fp64_test.cpp -o %t.cmath.fp64.out // RUN: %CPU_RUN_PLACEHOLDER %t.cmath.fp64.out -// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/std_complex_math_test.cpp %sycl_libs_dir/libsycl-complex.o %sycl_libs_dir/libsycl-cmath.o %sycl_libs_dir/libsycl-fallback-complex.o %sycl_libs_dir/libsycl-fallback-cmath.o -o %t.complex.out +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/std_complex_math_test.cpp -o %t.complex.out // RUN: %CPU_RUN_PLACEHOLDER %t.complex.out -// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/std_complex_math_fp64_test.cpp %sycl_libs_dir/libsycl-complex-fp64.o %sycl_libs_dir/libsycl-cmath-fp64.o %sycl_libs_dir/libsycl-fallback-complex-fp64.o %sycl_libs_dir/libsycl-fallback-cmath-fp64.o -o %t.complex.fp64.out +// RUN: %clangxx -fsycl -fsycl-device-lib=libm-fp64 -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/std_complex_math_fp64_test.cpp -o %t.complex.fp64.out // RUN: %CPU_RUN_PLACEHOLDER %t.complex.fp64.out diff --git a/sycl/test/devicelib/cmath_fp64_test.cpp b/sycl/test/devicelib/cmath_fp64_test.cpp index 30954e0eff59f..d1f42613c6c5f 100644 --- a/sycl/test/devicelib/cmath_fp64_test.cpp +++ b/sycl/test/devicelib/cmath_fp64_test.cpp @@ -1,9 +1,8 @@ -// UNSUPPORTED: windows -// RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath-fp64.o -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-lib=libm-fp64 %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out + #include #include #include diff --git a/sycl/test/devicelib/cmath_test.cpp b/sycl/test/devicelib/cmath_test.cpp index bb2e37345d6fc..e07ac0f55003e 100644 --- a/sycl/test/devicelib/cmath_test.cpp +++ b/sycl/test/devicelib/cmath_test.cpp @@ -1,36 +1,29 @@ -// UNSUPPORTED: windows -// RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath.o -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include "math_utils.hpp" #include #include #include -#include "math_utils.hpp" namespace s = cl::sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 38 +#define TEST_NUM 36 -float ref[TEST_NUM] = { -1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, -0, 2, 0, 0, 1, 0, 2, 0, 0, 0, -0, 0, 1, 0, 1, 2, 0, 1, 2, 5, -0, 0, 0, 0, 0.5, 0.5, NAN, NAN,}; +float ref[TEST_NUM] = {1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 0, + 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, + 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN}; float refIptr = 1; -template -void device_cmath_test(s::queue &deviceQueue) { +template void device_cmath_test_1(s::queue &deviceQueue) { s::range<1> numOfItems{TEST_NUM}; T result[TEST_NUM] = {-1}; - // Variable exponent is an integer value to store the exponent in frexp function - int exponent = -1; - // Variable iptr stores the integral part of float point in modf function T iptr = -1; @@ -38,15 +31,13 @@ void device_cmath_test(s::queue &deviceQueue) { int quo = -1; { s::buffer buffer1(result, numOfItems); - s::buffer buffer2(&exponent, s::range<1>{1}); - s::buffer buffer3(&iptr, s::range<1>{1}); - s::buffer buffer4(&quo, s::range<1>{1}); + s::buffer buffer2(&iptr, s::range<1>{1}); + s::buffer buffer3(&quo, s::range<1>{1}); deviceQueue.submit([&](cl::sycl::handler &cgh) { auto res_access = buffer1.template get_access(cgh); - auto exp_access = buffer2.template get_access(cgh); - auto iptr_access = buffer3.template get_access(cgh); - auto quo_access = buffer4.template get_access(cgh); - cgh.single_task([=]() { + auto iptr_access = buffer2.template get_access(cgh); + auto quo_access = buffer3.template get_access(cgh); + cgh.single_task([=]() { int i = 0; res_access[i++] = std::cos(0.0f); res_access[i++] = std::sin(0.0f); @@ -58,8 +49,6 @@ void device_cmath_test(s::queue &deviceQueue) { res_access[i++] = std::cosh(0.0f); res_access[i++] = std::exp(0.0f); res_access[i++] = std::fmod(1.5f, 1.0f); - res_access[i++] = std::frexp(0.0f, &exp_access[0]); - res_access[i++] = std::ldexp(1.0f, 1); res_access[i++] = std::log10(1.0f); res_access[i++] = std::modf(1.0f, &iptr_access[0]); res_access[i++] = std::pow(1.0f, 1.0f); @@ -99,16 +88,53 @@ void device_cmath_test(s::queue &deviceQueue) { // Test modf integral part assert(approx_equal_fp(iptr, refIptr)); - // Test frexp exponent - assert(exponent == 0); - // Test remquo sign assert(quo == 0); } +// MSVC implements std::ldexp and std::frexp by invoking the +// 'double' version of corresponding C math functions(ldexp and frexp). Those +// 2 functions can only work on Windows with fp64 extension support from +// underlying device. +#ifndef _WIN32 +template void device_cmath_test_2(s::queue &deviceQueue) { + s::range<1> numOfItems{2}; + T result[2] = {-1}; + T ref[2] = {0, 2}; + // Variable exponent is an integer value to store the exponent in frexp + // function + int exponent = -1; + + { + s::buffer buffer1(result, numOfItems); + s::buffer buffer2(&exponent, s::range<1>{1}); + deviceQueue.submit([&](cl::sycl::handler &cgh) { + auto res_access = buffer1.template get_access(cgh); + auto exp_access = buffer2.template get_access(cgh); + cgh.single_task([=]() { + int i = 0; + res_access[i++] = std::frexp(0.0f, &exp_access[0]); + res_access[i++] = std::ldexp(1.0f, 1); + }); + }); + } + + // Compare result with reference + for (int i = 0; i < 2; ++i) { + assert(approx_equal_fp(result[i], ref[i])); + } + + // Test frexp exponent + assert(exponent == 0); +} +#endif + int main() { s::queue deviceQueue; - device_cmath_test(deviceQueue); + device_cmath_test_1(deviceQueue); +#ifndef _WIN32 + device_cmath_test_2(deviceQueue); +#endif std::cout << "Pass" << std::endl; return 0; } diff --git a/sycl/test/devicelib/complex-fpga.cpp b/sycl/test/devicelib/complex-fpga.cpp new file mode 100644 index 0000000000000..aa01ee0a98de0 --- /dev/null +++ b/sycl/test/devicelib/complex-fpga.cpp @@ -0,0 +1,15 @@ +//==----- accelerator.cpp - AOT compilation for fpga devices using aoc ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------------===// +// UNSUPPORTED: windows +// REQUIRES: aoc, accelerator + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %S/std_complex_math_test.cpp -o %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fintelfpga %S/std_complex_math_test.cpp -o %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/devicelib/math_fp64_test.cpp b/sycl/test/devicelib/math_fp64_test.cpp index dec0bf7341d92..7f17fe63b3d01 100644 --- a/sycl/test/devicelib/math_fp64_test.cpp +++ b/sycl/test/devicelib/math_fp64_test.cpp @@ -1,9 +1,8 @@ -// REQUIRES: cpu, linux -// RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath-fp64.o -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-lib=libm-fp64 %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out + #include "math_utils.hpp" #include #include diff --git a/sycl/test/devicelib/math_fp64_windows_test.cpp b/sycl/test/devicelib/math_fp64_windows_test.cpp deleted file mode 100644 index 15c3aca33ba8e..0000000000000 --- a/sycl/test/devicelib/math_fp64_windows_test.cpp +++ /dev/null @@ -1,132 +0,0 @@ -// REQUIRES: cpu, windows -// RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath-fp64.obj -o %t.out -// RUN: env SYCL_DEVICE_TYPE=HOST %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %ACC_RUN_PLACEHOLDER %t.out -#include "math_utils.hpp" -#include -#include -#include - -namespace s = cl::sycl; -constexpr s::access::mode sycl_read = s::access::mode::read; -constexpr s::access::mode sycl_write = s::access::mode::write; - -#define TEST_NUM 41 - -double ref_val[TEST_NUM] = { - 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, - 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, - 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, - 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 1, 2, 0}; - -double refIptr = 1; - -void device_math_test(s::queue &deviceQueue) { - s::range<1> numOfItems{TEST_NUM}; - double result[TEST_NUM] = {-1}; - - // Variable exponent is an integer value to store the exponent in frexp function - int exponent = -1; - - // Variable iptr stores the integral part of float point in modf function - double iptr = -1; - - // Variable quo stores the sign and some bits of x/y in remquo function - int quo = -1; - - // Varaible enm stores the enum value retured by MSVC function - short enm[2] = {10, 10}; - { - s::buffer buffer1(result, numOfItems); - s::buffer buffer2(&exponent, s::range<1>{1}); - s::buffer buffer3(&iptr, s::range<1>{1}); - s::buffer buffer4(&quo, s::range<1>{1}); - s::buffer buffer5(enm, s::range<1>{2}); - deviceQueue.submit([&](cl::sycl::handler &cgh) { - auto res_access = buffer1.template get_access(cgh); - auto exp_access = buffer2.template get_access(cgh); - auto iptr_access = buffer3.template get_access(cgh); - auto quo_access = buffer4.template get_access(cgh); - auto enm_access = buffer5.template get_access(cgh); - cgh.single_task([=]() { - int i = 0; - res_access[i++] = cos(0.0); - res_access[i++] = sin(0.0); - res_access[i++] = log(1.0); - res_access[i++] = acos(1.0); - res_access[i++] = asin(0.0); - res_access[i++] = atan(0.0); - res_access[i++] = atan2(0.0, 1.0); - res_access[i++] = cosh(0.0); - res_access[i++] = exp(0.0); - res_access[i++] = fmod(1.5, 1.0); - res_access[i++] = frexp(0.0, &exp_access[0]); - res_access[i++] = ldexp(1.0, 1); - res_access[i++] = log10(1.0); - res_access[i++] = modf(1.0, &iptr_access[0]); - res_access[i++] = pow(1.0, 1.0); - res_access[i++] = sinh(0.0); - res_access[i++] = sqrt(4.0); - res_access[i++] = tan(0.0); - res_access[i++] = tanh(0.0); - res_access[i++] = acosh(1.0); - res_access[i++] = asinh(0.0); - res_access[i++] = atanh(0.0); - res_access[i++] = cbrt(1.0); - res_access[i++] = erf(0.0); - res_access[i++] = erfc(0.0); - res_access[i++] = exp2(1.0); - res_access[i++] = expm1(0.0); - res_access[i++] = fdim(1.0, 0.0); - res_access[i++] = fma(1.0, 1.0, 1.0); - res_access[i++] = hypot(3.0, 4.0); - res_access[i++] = ilogb(1.0); - res_access[i++] = log1p(0.0); - res_access[i++] = log2(1.0); - res_access[i++] = logb(1.0); - res_access[i++] = remainder(0.5, 1.0); - res_access[i++] = remquo(0.5, 1.0, &quo_access[0]); - double a = NAN; - res_access[i++] = tgamma(a); - res_access[i++] = lgamma(a); - enm_access[0] = _Dtest(&a); - a = 0.0; - enm_access[1] = _Exp(&a, 1.0, 0); - res_access[i++] = a; - res_access[i++] = _Cosh(0.0, 2.0); - res_access[i++] = _Sinh(0.0, 1.0); - }); - }); - } - - // Compare result with reference - for (int i = 0; i < TEST_NUM; ++i) { - assert(approx_equal_fp(result[i], ref_val[i])); - } - - // Test modf integral part - assert(approx_equal_fp(iptr, refIptr)); - - // Test frexp exponent - assert(exponent == 0); - - // Test remquo sign - assert(quo == 0); - - // Test enum value returned by _Dtest - assert(enm[0] == _NANCODE); - - // Test enum value returned by _Exp - assert(enm[1] == _FINITE); -} - -int main() { - s::queue deviceQueue; - if (deviceQueue.get_device().has_extension("cl_khr_fp64")) { - device_math_test(deviceQueue); - std::cout << "Pass" << std::endl; - } - return 0; -} diff --git a/sycl/test/devicelib/math_override_test.cpp b/sycl/test/devicelib/math_override_test.cpp index e634bf77fbd37..b3014f9bea884 100644 --- a/sycl/test/devicelib/math_override_test.cpp +++ b/sycl/test/devicelib/math_override_test.cpp @@ -1,6 +1,5 @@ // UNSUPPORTED: windows -// RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath.o -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out -fno-builtin // RUN: env SYCL_DEVICE_TYPE=HOST %t.out #include #include @@ -16,6 +15,8 @@ constexpr s::access::mode sycl_write = s::access::mode::write; SYCL_EXTERNAL extern "C" float sinf(float x) { return x + 100.f; } +SYCL_EXTERNAL +extern "C" float cosf(float x); class DeviceTest; void device_test() { @@ -37,7 +38,6 @@ void device_test() { }); }); } - assert(approx_equal_fp(result_sin, 100.f) && approx_equal_fp(result_cos, 1.f)); } diff --git a/sycl/test/devicelib/math_test.cpp b/sycl/test/devicelib/math_test.cpp index 1e3885960b2c5..38d9a8f081bd9 100644 --- a/sycl/test/devicelib/math_test.cpp +++ b/sycl/test/devicelib/math_test.cpp @@ -1,9 +1,8 @@ -// REQUIRES: cpu, linux -// RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath.o -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out + #include "math_utils.hpp" #include #include @@ -13,13 +12,11 @@ namespace s = cl::sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 38 +#define TEST_NUM 36 -float ref_val[TEST_NUM] = { - 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, - 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, - 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, - 0, 0, 0, 0, 0.5, 0.5, NAN, NAN}; +float ref_val[TEST_NUM] = {1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 0, + 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, + 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN}; float refIptr = 1; @@ -27,9 +24,6 @@ void device_math_test(s::queue &deviceQueue) { s::range<1> numOfItems{TEST_NUM}; float result[TEST_NUM] = {-1}; - // Variable exponent is an integer value to store the exponent in frexp function - int exponent = -1; - // Variable iptr stores the integral part of float point in modf function float iptr = -1; @@ -37,14 +31,12 @@ void device_math_test(s::queue &deviceQueue) { int quo = -1; { s::buffer buffer1(result, numOfItems); - s::buffer buffer2(&exponent, s::range<1>{1}); - s::buffer buffer3(&iptr, s::range<1>{1}); - s::buffer buffer4(&quo, s::range<1>{1}); + s::buffer buffer2(&iptr, s::range<1>{1}); + s::buffer buffer3(&quo, s::range<1>{1}); deviceQueue.submit([&](cl::sycl::handler &cgh) { auto res_access = buffer1.template get_access(cgh); - auto exp_access = buffer2.template get_access(cgh); - auto iptr_access = buffer3.template get_access(cgh); - auto quo_access = buffer4.template get_access(cgh); + auto iptr_access = buffer2.template get_access(cgh); + auto quo_access = buffer3.template get_access(cgh); cgh.single_task([=]() { int i = 0; res_access[i++] = cosf(0.0f); @@ -57,8 +49,6 @@ void device_math_test(s::queue &deviceQueue) { res_access[i++] = coshf(0.0f); res_access[i++] = expf(0.0f); res_access[i++] = fmodf(1.5f, 1.0f); - res_access[i++] = frexpf(0.0f, &exp_access[0]); - res_access[i++] = ldexpf(1.0f, 1); res_access[i++] = log10f(1.0f); res_access[i++] = modff(1.0f, &iptr_access[0]); res_access[i++] = powf(1.0f, 1.0f); @@ -98,9 +88,6 @@ void device_math_test(s::queue &deviceQueue) { // Test modf integral part assert(approx_equal_fp(iptr, refIptr)); - // Test frexp exponent - assert(exponent == 0); - // Test remquo sign assert(quo == 0); } diff --git a/sycl/test/devicelib/math_utils.hpp b/sycl/test/devicelib/math_utils.hpp index eb4f5cae07007..0e4c045fe208e 100644 --- a/sycl/test/devicelib/math_utils.hpp +++ b/sycl/test/devicelib/math_utils.hpp @@ -1,6 +1,11 @@ #ifndef MATH_UTILS #include #include +// _USE_MATH_DEFINES must be defined in order to use math constants in MSVC +#ifdef _WIN32 +#define _USE_MATH_DEFINES 1 +#include +#endif // Since it is not proper to compare float point using operator ==, this // function measures whether the result of cmath function from kernel is diff --git a/sycl/test/devicelib/math_windows_test.cpp b/sycl/test/devicelib/math_windows_test.cpp deleted file mode 100644 index fd5f2920cb949..0000000000000 --- a/sycl/test/devicelib/math_windows_test.cpp +++ /dev/null @@ -1,121 +0,0 @@ -// REQUIRES: cpu, windows -// RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath.obj -o %t.out -// RUN: env SYCL_DEVICE_TYPE=HOST %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %ACC_RUN_PLACEHOLDER %t.out -#include "math_utils.hpp" -#include -#include -#include - -namespace s = cl::sycl; -constexpr s::access::mode sycl_read = s::access::mode::read; -constexpr s::access::mode sycl_write = s::access::mode::write; - -#define TEST_NUM 39 - -float ref_val[TEST_NUM] = { - 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, - 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, - 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, - 0, 0, 0.5, 0.5, NAN, NAN, 1, 2, 0}; - -float refIptr = 1; - -void device_math_test(s::queue &deviceQueue) { - s::range<1> numOfItems{TEST_NUM}; - float result[TEST_NUM] = {-1}; - - // Variable iptr stores the integral part of float point in modf function - float iptr = -1; - - // Variable quo stores the sign and some bits of x/y in remquo function - int quo = -1; - - // Varaible enm stores the enum value retured by MSVC function - short enm[2] = {10, 10}; - - { - s::buffer buffer1(result, numOfItems); - s::buffer buffer2(&iptr, s::range<1>{1}); - s::buffer buffer3(&quo, s::range<1>{1}); - s::buffer buffer4(enm, s::range<1>{2}); - deviceQueue.submit([&](cl::sycl::handler &cgh) { - auto res_access = buffer1.template get_access(cgh); - auto iptr_access = buffer2.template get_access(cgh); - auto quo_access = buffer3.template get_access(cgh); - auto enm_access = buffer4.template get_access(cgh); - cgh.single_task([=]() { - int i = 0; - res_access[i++] = cosf(0.0f); - res_access[i++] = sinf(0.0f); - res_access[i++] = logf(1.0f); - res_access[i++] = acosf(1.0f); - res_access[i++] = asinf(0.0f); - res_access[i++] = atanf(0.0f); - res_access[i++] = atan2f(0.0f, 1.0f); - res_access[i++] = coshf(0.0f); - res_access[i++] = expf(0.0f); - res_access[i++] = fmodf(1.5f, 1.0f); - res_access[i++] = log10f(1.0f); - res_access[i++] = modff(1.0f, &iptr_access[0]); - res_access[i++] = powf(1.0f, 1.0f); - res_access[i++] = sinhf(0.0f); - res_access[i++] = sqrtf(4.0f); - res_access[i++] = tanf(0.0f); - res_access[i++] = tanhf(0.0f); - res_access[i++] = acoshf(1.0f); - res_access[i++] = asinhf(0.0f); - res_access[i++] = atanhf(0.0f); - res_access[i++] = cbrtf(1.0f); - res_access[i++] = erff(0.0f); - res_access[i++] = erfcf(0.0f); - res_access[i++] = exp2f(1.0f); - res_access[i++] = expm1f(0.0f); - res_access[i++] = fdimf(1.0f, 0.0f); - res_access[i++] = fmaf(1.0f, 1.0f, 1.0f); - res_access[i++] = hypotf(3.0f, 4.0f); - res_access[i++] = ilogbf(1.0f); - res_access[i++] = log1pf(0.0f); - res_access[i++] = log2f(1.0f); - res_access[i++] = logbf(1.0f); - res_access[i++] = remainderf(0.5f, 1.0f); - res_access[i++] = remquof(0.5f, 1.0f, &quo_access[0]); - float a = NAN; - res_access[i++] = tgammaf(a); - res_access[i++] = lgammaf(a); - enm_access[0] = _FDtest(&a); - a = 0.0f; - enm_access[1] = _FExp(&a, 1.0f, 0); - res_access[i++] = a; - res_access[i++] = _FCosh(0.0f, 2.0f); - res_access[i++] = _FSinh(0.0f, 1.0f); - }); - }); - } - - // Compare result with reference - for (int i = 0; i < TEST_NUM; ++i) { - assert(approx_equal_fp(result[i], ref_val[i])); - } - - // Test modf integral part - assert(approx_equal_fp(iptr, refIptr)); - - // Test remquo sign - assert(quo == 0); - - // Test enum value returned by _FDtest - assert(enm[0] == _NANCODE); - - // Test enum value returned by _FExp - assert(enm[1] == _FINITE); -} - -int main() { - s::queue deviceQueue; - device_math_test(deviceQueue); - std::cout << "Pass" << std::endl; - return 0; -} diff --git a/sycl/test/devicelib/std_complex_math_fp64_test.cpp b/sycl/test/devicelib/std_complex_math_fp64_test.cpp index 40eeadfd04321..fd609db28b6f7 100644 --- a/sycl/test/devicelib/std_complex_math_fp64_test.cpp +++ b/sycl/test/devicelib/std_complex_math_fp64_test.cpp @@ -1,6 +1,4 @@ -// UNSUPPORTED: windows -// RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-complex-fp64.o %sycl_libs_dir/libsycl-cmath-fp64.o -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-lib=libm-fp64 %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -17,8 +15,7 @@ namespace s = cl::sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -template -bool approx_equal_cmplx(complex x, complex y) { +template bool approx_equal_cmplx(complex x, complex y) { return approx_equal_fp(x.real(), y.real()) && approx_equal_fp(x.imag(), y.imag()); } @@ -86,8 +83,8 @@ std::array, TestArraySize1> ref1_results = { complex(M_PI_2, 0.), complex(M_PI_2, 0.549306144334055)}; -std::array ref2_results = {0., 25., 169., INFINITY, 0., - 5., 13., INFINITY, 0., M_PI_2}; +std::array ref2_results = { + 0., 25., 169., INFINITY, 0., 5., 13., INFINITY, 0., M_PI_2}; void device_complex_test(s::queue &deviceQueue) { s::range<1> numOfItems1{TestArraySize1}; diff --git a/sycl/test/devicelib/std_complex_math_test.cpp b/sycl/test/devicelib/std_complex_math_test.cpp index b2578076243ff..74f63bcf60387 100644 --- a/sycl/test/devicelib/std_complex_math_test.cpp +++ b/sycl/test/devicelib/std_complex_math_test.cpp @@ -1,6 +1,4 @@ -// UNSUPPORTED: windows -// RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-complex.o %sycl_libs_dir/libsycl-cmath.o -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -17,79 +15,53 @@ namespace s = cl::sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -template -bool approx_equal_cmplx(complex x, complex y) { +template bool approx_equal_cmplx(complex x, complex y) { return approx_equal_fp(x.real(), y.real()) && approx_equal_fp(x.imag(), y.imag()); } -static constexpr auto TestArraySize1 = 57; +static constexpr auto TestArraySize1 = 41; static constexpr auto TestArraySize2 = 10; +static constexpr auto TestArraySize3 = 16; std::array, TestArraySize1> ref1_results = { - complex(-1.f, 1.f), - complex(1.f, 3.f), - complex(-2.f, 10.f), - complex(-8.f, 31.f), - complex(1.f, 1.f), - complex(2.f, 1.f), - complex(2.f, 2.f), - complex(3.f, 4.f), - complex(2.f, 1.f), - complex(0.f, 1.f), - complex(2.f, 0.f), - complex(0.f, 0.f), - complex(0.f, 1.f), - complex(1.f, 1.f), - complex(2.f, 0.f), - complex(2.f, 3.f), - complex(1.f, 0.f), - complex(0.f, 1.f), - complex(-1.f, 0.f), - complex(0.f, M_E), - complex(0.f, 0.f), - complex(0.f, M_PI_2), - complex(0.f, M_PI), - complex(1.f, M_PI_2), - complex(0.f, 0.f), - complex(1.f, 0.f), - complex(1.f, 0.f), - complex(-1.f, 0.f), - complex(-INFINITY, 0.f), - complex(1.f, 0.f), - complex(10.f, 0.f), - complex(100.f, 0.f), - complex(200.f, 0.f), - complex(1.f, 2.f), - complex(INFINITY, 0.f), - complex(INFINITY, 0.f), - complex(0.f, 1.f), - complex(M_PI_2, 0.f), - complex(0.f, 0.f), - complex(1.f, 0.f), - complex(INFINITY, 0.f), - complex(0.f, 0.f), - complex(1.f, 0.f), - complex(0.f, 0.f), - complex(INFINITY, M_PI_2), - complex(INFINITY, 0.f), - complex(0.f, M_PI_2), - complex(INFINITY, M_PI_2), - complex(INFINITY, 0.f), - complex(0.f, 0.f), - complex(0.f, M_PI_2), - - complex(1.f, -4.f), - complex(18.f, -7.f), - complex(1.557408f, 0.f), - complex(0.f, 0.761594f), - complex(M_PI_2, 0.f), + complex(-1.f, 1.f), complex(1.f, 3.f), + complex(-2.f, 10.f), complex(-8.f, 31.f), + complex(1.f, 1.f), complex(2.f, 1.f), + complex(2.f, 2.f), complex(3.f, 4.f), + complex(2.f, 1.f), complex(0.f, 1.f), + complex(2.f, 0.f), complex(0.f, 0.f), + complex(1.f, 0.f), complex(0.f, 1.f), + complex(-1.f, 0.f), complex(0.f, M_E), + complex(0.f, 0.f), complex(0.f, M_PI_2), + complex(0.f, M_PI), complex(1.f, M_PI_2), + complex(0.f, 0.f), complex(1.f, 0.f), + complex(1.f, 0.f), complex(-1.f, 0.f), + complex(-INFINITY, 0.f), complex(1.f, 0.f), + complex(10.f, 0.f), complex(100.f, 0.f), + complex(200.f, 0.f), complex(1.f, 2.f), + complex(INFINITY, 0.f), complex(INFINITY, 0.f), + complex(0.f, 1.f), complex(0.f, 0.f), + complex(1.f, 0.f), complex(INFINITY, 0.f), + complex(0.f, 0.f), complex(0.f, M_PI_2), + complex(1.f, -4.f), complex(18.f, -7.f), complex(M_PI_2, 0.549306f)}; -std::array ref2_results = {0.f, 25.f, 169.f, INFINITY, 0.f, - 5.f, 13.f, INFINITY, 0.f, M_PI_2}; +std::array ref2_results = { + 0.f, 25.f, 169.f, INFINITY, 0.f, 5.f, 13.f, INFINITY, 0.f, M_PI_2}; + +std::array, TestArraySize3> ref3_results = { + complex(0.f, 1.f), complex(1.f, 1.f), + complex(2.f, 0.f), complex(2.f, 3.f), + complex(M_PI_2, 0.f), complex(0.f, 0.f), + complex(1.f, 0.f), complex(0.f, 0.f), + complex(INFINITY, M_PI_2), complex(INFINITY, 0.f), + complex(0.f, M_PI_2), complex(INFINITY, M_PI_2), + complex(INFINITY, 0.f), complex(1.557408f, 0.f), + complex(0.f, 0.761594f), complex(M_PI_2, 0.f), -void device_complex_test(s::queue &deviceQueue) { +}; +void device_complex_test_1(s::queue &deviceQueue) { s::range<1> numOfItems1{TestArraySize1}; s::range<1> numOfItems2{TestArraySize2}; std::array, TestArraySize1> result1; @@ -126,10 +98,6 @@ void device_complex_test(s::queue &deviceQueue) { complex(0.f, 10.f) / complex(0.f, 5.f); buf_out1_access[index++] = complex(0.f, 0.f) / complex(1.f, 0.f); - buf_out1_access[index++] = std::sqrt(complex(-1.f, 0.f)); - buf_out1_access[index++] = std::sqrt(complex(0.f, 2.f)); - buf_out1_access[index++] = std::sqrt(complex(4.f, 0.f)); - buf_out1_access[index++] = std::sqrt(complex(-5.f, 12.f)); buf_out1_access[index++] = std::exp(complex(0.f, 0.f)); buf_out1_access[index++] = std::exp(complex(0.f, M_PI_2)); buf_out1_access[index++] = std::exp(complex(0.f, M_PI)); @@ -151,25 +119,13 @@ void device_complex_test(s::queue &deviceQueue) { buf_out1_access[index++] = std::proj(complex(INFINITY, -1.f)); buf_out1_access[index++] = std::proj(complex(0.f, -INFINITY)); buf_out1_access[index++] = std::pow(complex(-1.f, 0.f), 0.5f); - buf_out1_access[index++] = std::acos(complex(0.f, 0.f)); buf_out1_access[index++] = std::sinh(complex(0.f, 0.f)); buf_out1_access[index++] = std::cosh(complex(0.f, 0.f)); buf_out1_access[index++] = std::cosh(complex(INFINITY, 0.f)); - buf_out1_access[index++] = std::tanh(complex(0.f, 0.f)); - buf_out1_access[index++] = std::tanh(complex(INFINITY, 1.f)); - buf_out1_access[index++] = std::asinh(complex(0.f, 0.f)); - buf_out1_access[index++] = std::asinh(complex(1.f, INFINITY)); - buf_out1_access[index++] = std::asinh(complex(INFINITY, 1.f)); - buf_out1_access[index++] = std::acosh(complex(0.f, 0.f)); - buf_out1_access[index++] = std::acosh(complex(1.f, INFINITY)); - buf_out1_access[index++] = std::acosh(complex(INFINITY, 1.f)); buf_out1_access[index++] = std::atanh(complex(0.f, 0.f)); buf_out1_access[index++] = std::atanh(complex(1.f, INFINITY)); buf_out1_access[index++] = std::conj(complex(1.f, 4.f)); buf_out1_access[index++] = std::conj(complex(18.f, 7.f)); - buf_out1_access[index++] = std::tan(complex(1.f, 0.f)); - buf_out1_access[index++] = std::tan(complex(0.f, 1.f)); - buf_out1_access[index++] = std::asin(complex(1.f, 0.f)); buf_out1_access[index++] = std::atan(complex(0.f, 2.f)); index = 0; @@ -195,8 +151,50 @@ void device_complex_test(s::queue &deviceQueue) { } } +// The MSVC implementation of some complex math functions depends on +// some 'double' C math functions such as ldexp, those complex math +// functions can only work on Windows with fp64 extension support from +// underlying device. +#ifndef _WIN32 +void device_complex_test_2(s::queue &deviceQueue) { + s::range<1> numOfItems1{TestArraySize3}; + std::array, TestArraySize3> result3; + { + s::buffer, 1> buffer1(result3.data(), numOfItems1); + deviceQueue.submit([&](s::handler &cgh) { + auto buf_out1_access = buffer1.get_access(cgh); + cgh.single_task([=]() { + int index = 0; + buf_out1_access[index++] = std::sqrt(complex(-1.f, 0.f)); + buf_out1_access[index++] = std::sqrt(complex(0.f, 2.f)); + buf_out1_access[index++] = std::sqrt(complex(4.f, 0.f)); + buf_out1_access[index++] = std::sqrt(complex(-5.f, 12.f)); + buf_out1_access[index++] = std::acos(complex(0.f, 0.f)); + buf_out1_access[index++] = std::tanh(complex(0.f, 0.f)); + buf_out1_access[index++] = std::tanh(complex(INFINITY, 1.f)); + buf_out1_access[index++] = std::asinh(complex(0.f, 0.f)); + buf_out1_access[index++] = std::asinh(complex(1.f, INFINITY)); + buf_out1_access[index++] = std::asinh(complex(INFINITY, 1.f)); + buf_out1_access[index++] = std::acosh(complex(0.f, 0.f)); + buf_out1_access[index++] = std::acosh(complex(1.f, INFINITY)); + buf_out1_access[index++] = std::acosh(complex(INFINITY, 1.f)); + buf_out1_access[index++] = std::tan(complex(1.f, 0.f)); + buf_out1_access[index++] = std::tan(complex(0.f, 1.f)); + buf_out1_access[index++] = std::asin(complex(1.f, 0.f)); + }); + }); + } + + for (size_t idx = 0; idx < TestArraySize3; ++idx) { + assert(approx_equal_cmplx(result3[idx], ref3_results[idx])); + } +} +#endif int main() { s::queue deviceQueue; - device_complex_test(deviceQueue); + device_complex_test_1(deviceQueue); +#ifndef _WIN32 + device_complex_test_2(deviceQueue); +#endif std::cout << "Pass" << std::endl; } diff --git a/sycl/test/spec_const/spec_const_redefine.cpp b/sycl/test/spec_const/spec_const_redefine.cpp index fc5e7dcb22ac1..e6413cd92dd8e 100644 --- a/sycl/test/spec_const/spec_const_redefine.cpp +++ b/sycl/test/spec_const/spec_const_redefine.cpp @@ -105,9 +105,9 @@ int main(int argc, char **argv) { } // --- Check that only two JIT compilation happened: -// CHECK-NOT: ---> piProgramLink -// CHECK: ---> piProgramLink -// CHECK: ---> piProgramLink -// CHECK-NOT: ---> piProgramLink +// CHECK-NOT: ---> piProgramBuild +// CHECK: ---> piProgramBuild +// CHECK: ---> piProgramBuild +// CHECK-NOT: ---> piProgramBuild // --- Check that the test completed with expected results: // CHECK: passed From e1646809f7afdbfbd195937029d2544233c30cc2 Mon Sep 17 00:00:00 2001 From: sergei <57672082+s-kanaev@users.noreply.github.com> Date: Tue, 15 Sep 2020 14:31:50 +0300 Subject: [PATCH 406/465] [SYCL][Doc] Add link to use pinned memory spec (#2463) Signed-off-by: Sergey Kanaev --- sycl/doc/extensions/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/doc/extensions/README.md b/sycl/doc/extensions/README.md index 994cef7185513..551e3bf1fbfb1 100644 --- a/sycl/doc/extensions/README.md +++ b/sycl/doc/extensions/README.md @@ -34,6 +34,7 @@ DPC++ extensions status: | [Sub-groups](SubGroup/SYCL_INTEL_sub_group.asciidoc) | Supported(OpenCL) | | | [SYCL_INTEL_unnamed_kernel_lambda](UnnamedKernelLambda/SYCL_INTEL_unnamed_kernel_lambda.asciidoc) | Supported(OpenCL) | | | [Unified Shared Memory](USM/USM.adoc) | Supported(OpenCL) | | +| [Use Pinned Memory Property](UsePinnedMemoryProperty/UsePinnedMemoryPropery.adoc) | Supported | | Legend: From 3833943fc44b613af7d882cdfefcedfe4f409ee4 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Tue, 15 Sep 2020 17:29:49 +0300 Subject: [PATCH 407/465] [SYCL] Unroll several loops in __init method accessor class (#2449) It might improve performance on several platforms. Co-author: Mark Mendell Signed-off-by: Dmitry Sidorov --- sycl/include/CL/sycl/accessor.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index dd4f7fc09be01..bce13bf54a7e7 100755 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -851,6 +851,7 @@ class accessor : void __init(ConcreteASPtrType Ptr, range AccessRange, range MemRange, id Offset) { MData = Ptr; +#pragma unroll for (int I = 0; I < AdjustedDim; ++I) { getOffset()[I] = Offset[I]; getAccessRange()[I] = AccessRange[I]; @@ -1388,6 +1389,7 @@ class accessor AccessRange, range, id) { MData = Ptr; +#pragma unroll for (int I = 0; I < AdjustedDim; ++I) getSize()[I] = AccessRange[I]; } From 93081e1db4b47f5f27dfb2ae5da448c947f8add9 Mon Sep 17 00:00:00 2001 From: Neil Spruit Date: Tue, 15 Sep 2020 11:19:38 -0700 Subject: [PATCH 408/465] [SYCL][L0]: Check Queue refcnt prior to using members in event wait/release (#2471) - If the Queue was cleared of L0 data structures ie Refcnt == 0 then all L0 data structures in the pi_queue can no longer be used. - Prevent EventWait and EventRelease from using invalid data structures if the Queue has already been cleared. Signed-off-by: Spruit, Neil R --- sycl/plugins/level_zero/pi_level_zero.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index ae141b3c1f78e..8bd1bc17c2bcb 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -3018,16 +3018,18 @@ pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { // Event has been signaled: If the fence for the associated command list // is signalled, then reset the fence and command list and add them to the // available list for reuse in PI calls. - EventList[I]->Queue->ZeCommandListFenceMapMutex.lock(); - ze_result_t ZeResult = ZE_CALL_NOCHECK(zeFenceQueryStatus( - EventList[I] - ->Queue->ZeCommandListFenceMap[EventList[I]->ZeCommandList])); - if (ZeResult == ZE_RESULT_SUCCESS) { - EventList[I]->Queue->resetCommandListFenceEntry( - EventList[I]->ZeCommandList, true); - EventList[I]->ZeCommandList = nullptr; + if (EventList[I]->Queue->RefCount > 0) { + EventList[I]->Queue->ZeCommandListFenceMapMutex.lock(); + ze_result_t ZeResult = ZE_CALL_NOCHECK(zeFenceQueryStatus( + EventList[I] + ->Queue->ZeCommandListFenceMap[EventList[I]->ZeCommandList])); + if (ZeResult == ZE_RESULT_SUCCESS) { + EventList[I]->Queue->resetCommandListFenceEntry( + EventList[I]->ZeCommandList, true); + EventList[I]->ZeCommandList = nullptr; + } + EventList[I]->Queue->ZeCommandListFenceMapMutex.unlock(); } - EventList[I]->Queue->ZeCommandListFenceMapMutex.unlock(); } } return PI_SUCCESS; @@ -3059,7 +3061,7 @@ pi_result piEventRelease(pi_event Event) { // If the fence associated with this command list has signalled, then // Reset the Command List Used in this event and put it back on the // available list. - if (Event->Queue->ZeCommandQueue) { + if (Event->Queue->RefCount > 0) { Event->Queue->ZeCommandListFenceMapMutex.lock(); ze_result_t ZeResult = ZE_CALL_NOCHECK(zeFenceQueryStatus( Event->Queue->ZeCommandListFenceMap[Event->ZeCommandList])); From 244e874bbea9bee1ccdd7a08d0e7ba0218c2fda1 Mon Sep 17 00:00:00 2001 From: mdtoguchi <47896532+mdtoguchi@users.noreply.github.com> Date: Wed, 16 Sep 2020 01:29:40 -0700 Subject: [PATCH 409/465] [Driver][SYCL] Improve situations where .exe is added for AOT tools (#2467) Previous fix only allowed for the .exe to be added for 'clang-cl' specific compilations, which did not allow for usage using clang with a windows specific target. --- clang/lib/Driver/ToolChains/SYCL.cpp | 3 ++- clang/test/Driver/sycl-offload.c | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 9b97c004fd2ab..4ed2ce29791c2 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -207,7 +207,8 @@ void SYCL::Linker::ConstructJob(Compilation &C, const JobAction &JA, static const char *makeExeName(Compilation &C, StringRef Name) { llvm::SmallString<8> ExeName(Name); - if (C.getDriver().IsCLMode()) + const ToolChain *HostTC = C.getSingleOffloadToolChain(); + if (HostTC->getTriple().isWindowsMSVCEnvironment()) ExeName.append(".exe"); return C.getArgs().MakeArgString(ExeName); } diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index 3bf790d87a6ce..983bb4695b690 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -689,10 +689,16 @@ // Check to be sure that for windows, the 'exe' tools are called // RUN: %clang_cl -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-CPU-WIN +// RUN: %clang -target x86_64-pc-windows-msvc -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-CPU-WIN // RUN: %clang_cl -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-GEN-WIN +// RUN: %clang -target x86_64-pc-windows-msvc -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-GEN-WIN // RUN: %clang_cl -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-FPGA-WIN +// RUN: %clang -target x86_64-pc-windows-msvc -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHK-TOOLS-FPGA-WIN // CHK-TOOLS-GEN-WIN: ocloc.exe{{.*}} // CHK-TOOLS-CPU-WIN: opencl-aot.exe{{.*}} // CHK-TOOLS-FPGA-WIN: aoc.exe{{.*}} From e53aa20baea938d4e5c6e9c97fe817dfa19123eb Mon Sep 17 00:00:00 2001 From: mdtoguchi <47896532+mdtoguchi@users.noreply.github.com> Date: Wed, 16 Sep 2020 01:30:27 -0700 Subject: [PATCH 410/465] [Driver][SYCL] Add defaultlib directive for sycl lib (#2464) When compiling for Windows, add the sycl dependent library to the object. If a person were to use /MTd to compile and not use /MTd to link, the default sycl lib would be passed to the linker instead of sycld. --- clang/lib/Driver/ToolChains/Clang.cpp | 10 ++++++++++ clang/lib/Driver/ToolChains/MSVC.cpp | 4 ++-- clang/test/Driver/sycl-offload.c | 10 ++++++---- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b06f75908ff66..9a3771abbc7d7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6819,6 +6819,16 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType, // users want. The /Za flag to cl.exe turns this off, but it's not // implemented in clang. CmdArgs.push_back("--dependent-lib=oldnames"); + + // Add SYCL dependent library + if (Args.hasArg(options::OPT_fsycl) && + !Args.hasArg(options::OPT_nolibsycl)) { + if (RTOptionID == options::OPT__SLASH_MDd || + RTOptionID == options::OPT__SLASH_MTd) + CmdArgs.push_back("--dependent-lib=sycld"); + else + CmdArgs.push_back("--dependent-lib=sycl"); + } } if (Arg *ShowIncludes = diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 764eb0c965e05..28ed44bb0e2d6 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -373,8 +373,8 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, !C.getDriver().IsCLMode()) CmdArgs.push_back("-defaultlib:libcmt"); - if (!Args.hasArg(options::OPT_nostdlib) && Args.hasArg(options::OPT_fsycl) && - !Args.hasArg(options::OPT_nolibsycl)) { + if (!C.getDriver().IsCLMode() && !Args.hasArg(options::OPT_nostdlib) && + Args.hasArg(options::OPT_fsycl) && !Args.hasArg(options::OPT_nolibsycl)) { if (Args.hasArg(options::OPT__SLASH_MDd) || Args.hasArg(options::OPT__SLASH_MTd)) CmdArgs.push_back("-defaultlib:sycld.lib"); diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index 983bb4695b690..5c8aad46b4c12 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -577,21 +577,23 @@ /// Check for default linking of sycl.lib with -fsycl usage // RUN: %clang -fsycl -target x86_64-unknown-windows-msvc %s -o %t -### 2>&1 | FileCheck -check-prefix=CHECK-LINK-SYCL %s -// RUN: %clang_cl -fsycl %s -o %t -### 2>&1 | FileCheck -check-prefix=CHECK-LINK-SYCL %s -// CHECK-LINK-SYCL: "{{.*}}link{{(.exe)?}}" +// RUN: %clang_cl -fsycl %s -o %t -### 2>&1 | FileCheck -check-prefix=CHECK-LINK-SYCL-CL %s +// CHECK-LINK-SYCL-CL: "--dependent-lib=sycl" +// CHECK-LINK-SYCL-CL-NOT: "-defaultlib:sycl.lib" // CHECK-LINK-SYCL: "-defaultlib:sycl.lib" /// Check no SYCL runtime is linked with -nolibsycl // RUN: %clang -fsycl -nolibsycl -target x86_64-unknown-windows-msvc %s -o %t -### 2>&1 | FileCheck -check-prefix=CHECK-LINK-NOLIBSYCL %s // RUN: %clang_cl -fsycl -nolibsycl %s -o %t -### 2>&1 | FileCheck -check-prefix=CHECK-LINK-NOLIBSYCL %s +// CHECK-LINK-NOLIBSYCL-NOT: "--dependent-lib=sycl" // CHECK-LINK-NOLIBSYCL: "{{.*}}link{{(.exe)?}}" // CHECK-LINK-NOLIBSYCL-NOT: "-defaultlib:sycl.lib" /// Check sycld.lib is chosen with /MDd and /MTd // RUN: %clang_cl -fsycl /MDd %s -o %t -### 2>&1 | FileCheck -check-prefix=CHECK-LINK-SYCL-DEBUG %s // RUN: %clang_cl -fsycl /MTd %s -o %t -### 2>&1 | FileCheck -check-prefix=CHECK-LINK-SYCL-DEBUG %s -// CHECK-LINK-SYCL-DEBUG: "{{.*}}link{{(.exe)?}}" -// CHECK-LINK-SYCL-DEBUG: "-defaultlib:sycld.lib" +// CHECK-LINK-SYCL-DEBUG: "--dependent-lib=sycld" +// CHECK-LINK-SYCL-DEBUG-NOT: "-defaultlib:sycld.lib" /// Check "-spirv-allow-unknown-intrinsics" option is emitted for llvm-spirv tool for esimd mode // RUN: %clangxx %s -fsycl -fsycl-explicit-simd -### 2>&1 | FileCheck %s --check-prefix=CHK-FSYCL-ESIMD From 5a2fe9ccbd17db1aa64ddfbb267685234aa41038 Mon Sep 17 00:00:00 2001 From: premanandrao <47116977+premanandrao@users.noreply.github.com> Date: Wed, 16 Sep 2020 05:08:36 -0400 Subject: [PATCH 411/465] [SYCL] Emit an aliased function only if it is used (#2430) Signed-off-by: Premanand M Rao --- clang/lib/CodeGen/CodeGenModule.cpp | 65 +++++++++++++++++--- clang/lib/CodeGen/CodeGenModule.h | 5 ++ clang/test/CodeGenSYCL/sycl-device-alias.cpp | 45 ++++++++++++++ 3 files changed, 107 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGenSYCL/sycl-device-alias.cpp diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 01bae3b04699c..b98d2027bd647 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2264,6 +2264,30 @@ void CodeGenModule::EmitDeferred() { CurDeclsToEmit.swap(DeferredDeclsToEmit); for (GlobalDecl &D : CurDeclsToEmit) { + const ValueDecl *VD = cast(D.getDecl()); + // If emitting for SYCL device, emit the deferred alias + // as well as what it aliases. + if (LangOpts.SYCLIsDevice) { + if (AliasAttr *Attr = VD->getAttr()) { + StringRef AliaseeName = Attr->getAliasee(); + auto DDI = DeferredDecls.find(AliaseeName); + // Emit what is aliased first. + if (DDI != DeferredDecls.end()) { + llvm::GlobalValue *AliaseeGV = dyn_cast( + GetAddrOfGlobal(DDI->second, ForDefinition)); + if (!AliaseeGV) + AliaseeGV = GetGlobalValue(getMangledName(DDI->second)); + assert(AliaseeGV); + EmitGlobalDefinition(DDI->second, AliaseeGV); + // Remove the entry just added to the DeferredDeclsToEmit + // since we have emitted it. + DeferredDeclsToEmit.pop_back(); + } + // Now emit the alias itself. + EmitAliasDefinition(D); + continue; + } + } // We should call GetAddrOfGlobal with IsForDefinition set to true in order // to get GlobalValue with exactly the type we need, not something that // might had been created for another decl with the same mangled name but @@ -2296,6 +2320,20 @@ void CodeGenModule::EmitDeferred() { // Otherwise, emit the definition and move on to the next one. EmitGlobalDefinition(D, GV); + if (LangOpts.SYCLIsDevice) { + // If there are any aliases deferred for this, emit those now. + for (auto It = DeferredAliases.begin(); It != DeferredAliases.end(); + /*no increment*/) { + const ValueDecl *Global = cast(It->second.getDecl()); + if (It->first == getMangledName(D)) { + EmitAliasDefinition(Global); + It = DeferredAliases.erase(It); + } else { + ++It; + } + } + } + // If we found out that we need to emit more decls, do that recursively. // This has the advantage that the decls are emitted in a DFS and related // ones are close together, which is convenient for testing. @@ -2619,9 +2657,19 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { return; // If this is an alias definition (which otherwise looks like a declaration) - // emit it now. - if (Global->hasAttr()) - return EmitAliasDefinition(GD); + // handle it now. + if (AliasAttr *Attr = Global->getAttr()) { + // Emit the alias here if it is not SYCL device compilation. + if (!LangOpts.SYCLIsDevice) + return EmitAliasDefinition(GD); + // Defer for SYCL devices, until either the alias or what it aliases + // is used. + StringRef MangledName = getMangledName(GD); + DeferredDecls[MangledName] = GD; + StringRef AliaseeName = Attr->getAliasee(); + DeferredAliases[AliaseeName] = GD; + return; + } // IFunc like an alias whose value is resolved at runtime by calling resolver. if (Global->hasAttr()) @@ -4836,20 +4884,21 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { // if a deferred decl. llvm::Constant *Aliasee; llvm::GlobalValue::LinkageTypes LT; + unsigned AS; if (isa(DeclTy)) { Aliasee = GetOrCreateLLVMFunction(AA->getAliasee(), DeclTy, GD, /*ForVTable=*/false); LT = getFunctionLinkage(GD); + AS = Aliasee->getType()->getPointerAddressSpace(); } else { - Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), - llvm::PointerType::getUnqual(DeclTy), + AS = ArgInfoAddressSpace(GetGlobalVarAddressSpace(/*D=*/nullptr)); + Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy->getPointerTo(AS), /*D=*/nullptr); LT = getLLVMLinkageVarDefinition(cast(GD.getDecl()), D->getType().isConstQualified()); } // Create the new alias itself, but don't set a name yet. - unsigned AS = Aliasee->getType()->getPointerAddressSpace(); auto *GA = llvm::GlobalAlias::create(DeclTy, AS, LT, "", Aliasee, &getModule()); @@ -4870,8 +4919,8 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { // Remove it and replace uses of it with the alias. GA->takeName(Entry); - Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GA, - Entry->getType())); + Entry->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(GA, Entry->getType())); Entry->eraseFromParent(); } else { GA->setName(MangledName); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 2037571f38829..cac10a97e7106 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -345,6 +345,11 @@ class CodeGenModule : public CodeGenTypeCache { /// yet. std::map DeferredDecls; + /// This contains all the aliases that are deferred for emission until + /// they or what they alias are actually used. Note that the StringRef + /// associated in this map is that of the aliasee. + std::map DeferredAliases; + /// This is a list of deferred decls which we have seen that *are* actually /// referenced. These get code generated when the module is done. std::vector DeferredDeclsToEmit; diff --git a/clang/test/CodeGenSYCL/sycl-device-alias.cpp b/clang/test/CodeGenSYCL/sycl-device-alias.cpp new file mode 100644 index 0000000000000..3a124901b471d --- /dev/null +++ b/clang/test/CodeGenSYCL/sycl-device-alias.cpp @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// Test that aliasing does not force an unused entity to be emitted + +// CHECK-NOT: define spir_func void @unused_func() +extern "C" void unused_func() {} +// CHECK-NOT: @unused_aliaser +extern "C" void unused_aliaser() __attribute__((alias("unused_func"))); +// CHECK-NOT: @unused_int +int unused_int = 3; +// CHECK-NOT: @alias_unused_int +extern int alias_unused_int __attribute__((alias("unused_int"))); + +// CHECK-DAG: define spir_func void @used_func() +extern "C" void used_func() {} +// CHECK-DAG: @aliaser = alias void (), void ()* @used_func +extern "C" void aliaser() __attribute__((alias("used_func"))); + +// CHECK-DAG: define spir_func void @func() +extern "C" void func() {} +// CHECK-DAG: @used_aliaser = alias void (), void ()* @func +extern "C" void used_aliaser() __attribute__((alias("func"))); + +// CHECK-DAG: @used_int = addrspace(1) constant i32 5, align 4 +extern "C" const int used_int = 5; +// CHECK-DAG: @alias_used_int = alias i32, i32 addrspace(1)* @used_int +extern "C" const int alias_used_int __attribute__((alias("used_int"))); +// CHECK-DAG: @vint = addrspace(1) constant i32 7, align 4 +extern "C" const int vint = 7; +// CHECK-DAG: @used_alias_used_int = alias i32, i32 addrspace(1)* @vint +extern "C" const int used_alias_used_int __attribute__((alias("vint"))); + +// CHECK-DAG: define spir_func void @{{.*}}bar{{.*}} +void bar(const int &i) {} + +// CHECK-DAG: define spir_func void @{{.*}}foo{{.*}} +void __attribute__((sycl_device)) foo() { + // CHECK-DAG: call spir_func void @{{.*}}bar{{.*}}@used_int + bar(used_int); + // CHECK-DAG: call spir_func void @{{.*}}bar{{.*}}@used_alias_used_int + bar(used_alias_used_int); + // CHECK-DAG: call spir_func void @used_func() + used_func(); + // CHECK-DAG: call spir_func void @used_aliaser() + used_aliaser(); +} From ac42d44ec6abb2a4acfc417c74b59890c2a375ba Mon Sep 17 00:00:00 2001 From: Neil Spruit Date: Wed, 16 Sep 2020 03:17:25 -0700 Subject: [PATCH 412/465] [SYCL][L0] Kernel Destroy in piKernelRelease (#2475) Signed-off-by: Spruit, Neil R --- sycl/plugins/level_zero/pi_level_zero.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 8bd1bc17c2bcb..46dff5d505dff 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2779,6 +2779,7 @@ pi_result piKernelRelease(pi_kernel Kernel) { assert(Kernel); if (--(Kernel->RefCount) == 0) { + zeKernelDestroy(Kernel->ZeKernel); delete Kernel; } return PI_SUCCESS; From 628424a2be2088633f1bd6c26d075c7003396a61 Mon Sep 17 00:00:00 2001 From: vladimirlaz Date: Wed, 16 Sep 2020 13:39:29 +0300 Subject: [PATCH 413/465] [SYCL] Fix LIT regression after 9dd18ca8 (#2481) --- clang/test/Driver/sycl-intelfpga-static-lib-win.cpp | 4 ++-- clang/test/Driver/sycl-intelfpga-static-lib.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/Driver/sycl-intelfpga-static-lib-win.cpp b/clang/test/Driver/sycl-intelfpga-static-lib-win.cpp index 2f61c8d9567d4..1f55ecd8921ff 100644 --- a/clang/test/Driver/sycl-intelfpga-static-lib-win.cpp +++ b/clang/test/Driver/sycl-intelfpga-static-lib-win.cpp @@ -11,7 +11,7 @@ // RUN: lib -out:%t.lib %t1_bundle.obj /// Check phases with static lib -// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -fintelfpga %t.lib -ccc-print-phases 2>&1 \ +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -fno-sycl-device-lib=all -fintelfpga %t.lib -ccc-print-phases 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK_PHASES %s // CHECK_PHASES: 0: input, "[[INPUT:.+\.lib]]", object, (host-sycl) // CHECK_PHASES: 1: linker, {0}, image, (host-sycl) @@ -27,7 +27,7 @@ // CHECK_PHASES: 11: offload, "host-sycl (x86_64-pc-windows-msvc)" {1}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {10}, image /// Check for unbundle and use of deps in static lib -// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -fintelfpga %t.lib -### 2>&1 \ +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -fno-sycl-device-lib=all -fintelfpga %t.lib -### 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK_UNBUNDLE %s // CHECK_UNBUNDLE: clang-offload-bundler" "-type=aoo" "-targets=sycl-fpga_dep" "-inputs={{.*}}" "-outputs=[[DEPFILES:.+\.txt]]" "-unbundle" // CHECK_UNBUNDLE: aoc{{.*}} "-dep-files=@[[DEPFILES]]" diff --git a/clang/test/Driver/sycl-intelfpga-static-lib.cpp b/clang/test/Driver/sycl-intelfpga-static-lib.cpp index 510684ddaed7f..1a77afd98465a 100644 --- a/clang/test/Driver/sycl-intelfpga-static-lib.cpp +++ b/clang/test/Driver/sycl-intelfpga-static-lib.cpp @@ -10,7 +10,7 @@ // RUN: llvm-ar cr %t.a %t1_bundle.o /// Check phases with static lib -// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fintelfpga %t.a -ccc-print-phases 2>&1 \ +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga %t.a -ccc-print-phases 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK_PHASES %s // CHECK_PHASES: 0: input, "[[INPUT:.+\.a]]", object, (host-sycl) // CHECK_PHASES: 1: linker, {0}, image, (host-sycl) @@ -27,7 +27,7 @@ // CHECK_PHASES: 12: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {11}, image /// Check for unbundle and use of deps in static lib -// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fintelfpga %t.a -### 2>&1 \ +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga %t.a -### 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK_UNBUNDLE %s // CHECK_UNBUNDLE: clang-offload-bundler" "-type=aoo" "-targets=sycl-fpga_dep" "-inputs={{.*}}" "-outputs=[[DEPFILES:.+\.txt]]" "-unbundle" // CHECK_UNBUNDLE: aoc{{.*}} "-dep-files=@[[DEPFILES]]" From 129ee442826729bd7760f116802aefd3106a5625 Mon Sep 17 00:00:00 2001 From: smaslov-intel <48694368+smaslov-intel@users.noreply.github.com> Date: Wed, 16 Sep 2020 23:56:02 +0700 Subject: [PATCH 414/465] [SYCL]: basic support of contexts with multiple devices in Level-Zero (#2440) Signed-off-by: Sergey V Maslov --- sycl/plugins/level_zero/pi_level_zero.cpp | 208 ++++++++++++++-------- sycl/plugins/level_zero/pi_level_zero.hpp | 37 ++-- 2 files changed, 149 insertions(+), 96 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 46dff5d505dff..522778c66e223 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -12,6 +12,7 @@ /// \ingroup sycl_pi_level_zero #include "pi_level_zero.hpp" +#include #include #include #include @@ -219,9 +220,13 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &ZePool, ZeEventPoolDesc.count = MaxNumEventsPerPool; ZeEventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - ze_device_handle_t ZeDevice = Device->ZeDevice; - if (ze_result_t ZeRes = zeEventPoolCreate(ZeContext, &ZeEventPoolDesc, 1, - &ZeDevice, &ZeEventPool)) + std::vector ZeDevices; + std::for_each(Devices.begin(), Devices.end(), + [&](pi_device &D) { ZeDevices.push_back(D->ZeDevice); }); + + if (ze_result_t ZeRes = + zeEventPoolCreate(ZeContext, &ZeEventPoolDesc, ZeDevices.size(), + &ZeDevices[0], &ZeEventPool)) return ZeRes; NumEventsAvailableInEventPool[ZeEventPool] = MaxNumEventsPerPool - 1; NumEventsLiveInEventPool[ZeEventPool] = MaxNumEventsPerPool; @@ -408,9 +413,9 @@ _pi_queue::resetCommandListFenceEntry(ze_command_list_handle_t ZeCommandList, ZE_CALL(zeFenceReset(this->ZeCommandListFenceMap[ZeCommandList])); ZE_CALL(zeCommandListReset(ZeCommandList)); if (MakeAvailable) { - this->Context->Device->ZeCommandListCacheMutex.lock(); - this->Context->Device->ZeCommandListCache.push_back(ZeCommandList); - this->Context->Device->ZeCommandListCacheMutex.unlock(); + this->Device->ZeCommandListCacheMutex.lock(); + this->Device->ZeCommandListCache.push_back(ZeCommandList); + this->Device->ZeCommandListCacheMutex.unlock(); } return PI_SUCCESS; @@ -433,7 +438,7 @@ _pi_device::getAvailableCommandList(pi_queue Queue, // Initally, we need to check if a command list has already been created // on this device that is available for use. If so, then reuse that - // L0 Command List and Fence for this PI call. + // Level-Zero Command List and Fence for this PI call. if (Queue->Device->ZeCommandListCache.size() > 0) { Queue->Device->ZeCommandListCacheMutex.lock(); *ZeCommandList = Queue->Device->ZeCommandListCache.front(); @@ -749,11 +754,25 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, assert(Platform); // Create PI platform from the given Level Zero driver handle. + // TODO: get the platform from the platforms' cache. auto ZeDriver = pi_cast(NativeHandle); *Platform = new _pi_platform(ZeDriver); return PI_SUCCESS; } +// Get the cahched PI device created for the L0 device handle. +// Return NULL if no such PI device found. +pi_device _pi_platform::getDeviceFromNativeHandle(ze_device_handle_t ZeDevice) { + + std::lock_guard Lock(this->PiDevicesCacheMutex); + auto it = std::find_if(PiDevicesCache.begin(), PiDevicesCache.end(), + [&](pi_device &D) { return D->ZeDevice == ZeDevice; }); + if (it != PiDevicesCache.end()) { + return *it; + } + return nullptr; +} + pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 NumEntries, pi_device *Devices, pi_uint32 *NumDevices) { @@ -1391,6 +1410,7 @@ pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, assert(Platform); // Create PI device from the given Level Zero device handle. + // TODO: get the device from the devices' cache. auto ZeDevice = pi_cast(NativeHandle); *Device = new _pi_device(ZeDevice, Platform); return (*Device)->initialize(); @@ -1402,15 +1422,14 @@ pi_result piContextCreate(const pi_context_properties *Properties, const void *PrivateInfo, size_t CB, void *UserData), void *UserData, pi_context *RetContext) { - if (NumDevices != 1 || !Devices) { - zePrint("piCreateContext: context should have exactly one Device\n"); + if (!Devices) { return PI_INVALID_VALUE; } assert(RetContext); try { - *RetContext = new _pi_context(*Devices); + *RetContext = new _pi_context(NumDevices, Devices); } catch (const std::bad_alloc &) { return PI_OUT_OF_HOST_MEMORY; } catch (...) { @@ -1444,9 +1463,10 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); switch (ParamName) { case PI_CONTEXT_INFO_DEVICES: - return ReturnValue(Context->Device); + return getInfoArray(Context->Devices.size(), ParamValueSize, ParamValue, + ParamValueSizeRet, &Context->Devices[0]); case PI_CONTEXT_INFO_NUM_DEVICES: - return ReturnValue(pi_uint32{1}); + return ReturnValue(pi_uint32(Context->Devices.size())); case PI_CONTEXT_INFO_REFERENCE_COUNT: return ReturnValue(pi_uint32{Context->RefCount}); default: @@ -1521,7 +1541,8 @@ pi_result piQueueCreate(pi_context Context, pi_device Device, if (!Context) { return PI_INVALID_CONTEXT; } - if (Context->Device != Device) { + if (std::find(Context->Devices.begin(), Context->Devices.end(), Device) == + Context->Devices.end()) { return PI_INVALID_DEVICE; } @@ -1628,7 +1649,11 @@ pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle, assert(Queue); auto ZeQueue = pi_cast(NativeHandle); - *Queue = new _pi_queue(ZeQueue, Context, Context->Device); + + // Attach the queue to the "0" device. + // TODO: see if we need to let user choose the device. + pi_device Device = Context->Devices[0]; + *Queue = new _pi_queue(ZeQueue, Context, Device); return PI_SUCCESS; } @@ -1641,14 +1666,24 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size, assert(RetMem); void *Ptr; - ze_device_handle_t ZeDevice = Context->Device->ZeDevice; - ze_device_mem_alloc_desc_t ZeDesc = {}; - ZeDesc.flags = 0; - ZeDesc.ordinal = 0; - ZE_CALL(zeMemAllocDevice(Context->ZeContext, &ZeDesc, Size, - 1, // TODO: alignment - ZeDevice, &Ptr)); + ze_device_mem_alloc_desc_t ZeDeviceMemDesc = {}; + ZeDeviceMemDesc.flags = 0; + ZeDeviceMemDesc.ordinal = 0; + + if (Context->Devices.size() == 1) { + ZE_CALL(zeMemAllocDevice(Context->ZeContext, &ZeDeviceMemDesc, Size, + 1, // TODO: alignment + Context->Devices[0]->ZeDevice, &Ptr)); + } else { + ze_host_mem_alloc_desc_t ZeHostMemDesc = {}; + ZeHostMemDesc.flags = 0; + ZE_CALL(zeMemAllocShared(Context->ZeContext, &ZeDeviceMemDesc, + &ZeHostMemDesc, Size, + 1, // TODO: alignment + nullptr, // not bound to any device + &Ptr)); + } if ((Flags & PI_MEM_FLAGS_HOST_PTR_USE) != 0 || (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) != 0) { @@ -1837,9 +1872,17 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, ZeImageDesc.arraylevels = pi_cast(ImageDesc->image_array_size); ZeImageDesc.miplevels = ImageDesc->num_mip_levels; + // Have the "0" device in context to own the image. Rely on Level-Zero + // drivers to perform migration as necessary for sharing it across multiple + // devices in the context. + // + // TODO: figure out if we instead need explicit copying for acessing + // the image from other devices in the context. + // + pi_device Device = Context->Devices[0]; ze_image_handle_t ZeHImage; - ZE_CALL(zeImageCreate(Context->ZeContext, Context->Device->ZeDevice, - &ZeImageDesc, &ZeHImage)); + ZE_CALL(zeImageCreate(Context->ZeContext, Device->ZeDevice, &ZeImageDesc, + &ZeHImage)); auto HostPtrOrNull = (Flags & PI_MEM_FLAGS_HOST_PTR_USE) ? pi_cast(HostPtr) : nullptr; @@ -1926,7 +1969,7 @@ pi_result piProgramCreateWithBinary(pi_context Context, pi_uint32 NumDevices, *BinaryStatus = PI_INVALID_VALUE; return PI_INVALID_VALUE; } - if (DeviceList[0] != Context->Device) + if (DeviceList[0] != Context->Devices[0]) return PI_INVALID_DEVICE; size_t Length = Lengths[0]; @@ -1975,10 +2018,11 @@ pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, case PI_PROGRAM_INFO_REFERENCE_COUNT: return ReturnValue(pi_uint32{Program->RefCount}); case PI_PROGRAM_INFO_NUM_DEVICES: - // Level Zero Module is always for a single device. + // TODO: return true number of devices this program exists for. return ReturnValue(pi_uint32{1}); case PI_PROGRAM_INFO_DEVICES: - return ReturnValue(Program->Context->Device); + // TODO: return all devices this program exists for. + return ReturnValue(Program->Context->Devices[0]); case PI_PROGRAM_INFO_BINARY_SIZES: { size_t SzBinary; if (Program->State == _pi_program::IL || @@ -2105,9 +2149,10 @@ pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, void (*PFnNotify)(pi_program Program, void *UserData), void *UserData, pi_program *RetProgram) { - // We only support one device with Level Zero. + // We only support one device with Level Zero currently. + pi_device Device = Context->Devices[0]; assert(NumDevices == 1); - assert(DeviceList && DeviceList[0] == Context->Device); + assert(DeviceList && DeviceList[0] == Device); assert(!PFnNotify && !UserData); // Validate input parameters. @@ -2170,9 +2215,8 @@ pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, // only export symbols. Guard.unlock(); ze_module_handle_t ZeModule; - pi_result res = - copyModule(Context->ZeContext, Context->Device->ZeDevice, - Input->ZeModule, &ZeModule); + pi_result res = copyModule(Context->ZeContext, Device->ZeDevice, + Input->ZeModule, &ZeModule); if (res != PI_SUCCESS) { return res; } @@ -2270,7 +2314,9 @@ static pi_result compileOrBuild(pi_program Program, pi_uint32 NumDevices, if ((NumDevices && !DeviceList) || (!NumDevices && DeviceList)) return PI_INVALID_VALUE; - // We only support one device with Level Zero. + // We only support build to one device with Level Zero now. + // TODO: we should eventually build to the possibly multiple root + // devices in the context. assert(NumDevices == 1 && DeviceList); // We should have either IL or native device code. @@ -2307,7 +2353,7 @@ static pi_result compileOrBuild(pi_program Program, pi_uint32 NumDevices, ZeModuleDesc.pBuildFlags = Options; ZeModuleDesc.pConstants = &ZeSpecConstants; - ze_device_handle_t ZeDevice = Program->Context->Device->ZeDevice; + ze_device_handle_t ZeDevice = DeviceList[0]->ZeDevice; ze_context_handle_t ZeContext = Program->Context->ZeContext; ze_module_handle_t ZeModule; ze_module_build_log_handle_t ZeBuildLog; @@ -2905,7 +2951,8 @@ pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { ze_event_handle_t ZeEvent; ze_event_desc_t ZeEventDesc = {}; // We have to set the SIGNAL & WAIT flags as HOST scope because the - // L0 plugin implementation waits for the events to complete on the host. + // Level-Zero plugin implementation waits for the events to complete + // on the host. ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; ZeEventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; ZeEventDesc.index = Index; @@ -3111,7 +3158,14 @@ pi_result piSamplerCreate(pi_context Context, assert(Context); assert(RetSampler); - ze_device_handle_t ZeDevice = Context->Device->ZeDevice; + // Have the "0" device in context to own the sampler. Rely on Level-Zero + // drivers to perform migration as necessary for sharing it across multiple + // devices in the context. + // + // TODO: figure out if we instead need explicit copying for acessing + // the sampler from other devices in the context. + // + pi_device Device = Context->Devices[0]; ze_sampler_handle_t ZeSampler; ze_sampler_desc_t ZeSamplerDesc = {}; @@ -3199,7 +3253,7 @@ pi_result piSamplerCreate(pi_context Context, } } - ZE_CALL(zeSamplerCreate(Context->ZeContext, ZeDevice, + ZE_CALL(zeSamplerCreate(Context->ZeContext, Device->ZeDevice, &ZeSamplerDesc, // TODO: translate properties &ZeSampler)); @@ -4241,49 +4295,44 @@ pi_result piextUSMFree(pi_context Context, void *Ptr) { ze_memory_allocation_properties_t ZeMemoryAllocationProperties = {}; // Query memory type of the pointer we're freeing to determine the correct - // way to do it(directly or via the allocator) + // way to do it(directly or via an allocator) ZE_CALL(zeMemGetAllocProperties( Context->ZeContext, Ptr, &ZeMemoryAllocationProperties, &ZeDeviceHandle)); - // TODO: when support for multiple devices is implemented, here - // we should do the following: - // - Find pi_device instance corresponding to ZeDeviceHandle we've just got if - // exist - // - Use that pi_device to find the right allocator context and free the - // pointer. - - // The allocation doesn't belong to any device for which USM allocator is - // enabled. - if (Context->Device->ZeDevice != ZeDeviceHandle) { - return USMFreeImpl(Context, Ptr); - } - - auto DeallocationHelper = - [Context, - Ptr](std::unordered_map &AllocContextMap) { - try { - auto It = AllocContextMap.find(Context->Device); - if (It == AllocContextMap.end()) - return PI_INVALID_VALUE; - - // The right context is found, deallocate the pointer - It->second.deallocate(Ptr); - } catch (const UsmAllocationException &Ex) { - return Ex.getError(); - } + if (ZeDeviceHandle) { + // All devices in the context are of the same platform. + auto Platform = Context->Devices[0]->Platform; + auto Device = Platform->getDeviceFromNativeHandle(ZeDeviceHandle); + assert(Device); + + auto DeallocationHelper = + [Context, Device, + Ptr](std::unordered_map &AllocContextMap) { + try { + auto It = AllocContextMap.find(Device); + if (It == AllocContextMap.end()) + return PI_INVALID_VALUE; + + // The right context is found, deallocate the pointer + It->second.deallocate(Ptr); + } catch (const UsmAllocationException &Ex) { + return Ex.getError(); + } - return PI_SUCCESS; - }; + return PI_SUCCESS; + }; - switch (ZeMemoryAllocationProperties.type) { - case ZE_MEMORY_TYPE_SHARED: - return DeallocationHelper(Context->SharedMemAllocContexts); - case ZE_MEMORY_TYPE_DEVICE: - return DeallocationHelper(Context->DeviceMemAllocContexts); - default: - // Handled below - break; + switch (ZeMemoryAllocationProperties.type) { + case ZE_MEMORY_TYPE_SHARED: + return DeallocationHelper(Context->SharedMemAllocContexts); + case ZE_MEMORY_TYPE_DEVICE: + return DeallocationHelper(Context->DeviceMemAllocContexts); + default: + // Handled below + break; + } } + return USMFreeImpl(Context, Ptr); } @@ -4519,14 +4568,15 @@ pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, } return ReturnValue(MemAllocaType); } - case PI_MEM_ALLOC_DEVICE: { + case PI_MEM_ALLOC_DEVICE: if (ZeDeviceHandle) { - if (Context->Device->ZeDevice == ZeDeviceHandle) { - return ReturnValue(Context->Device); - } + // All devices in the context are of the same platform. + auto Platform = Context->Devices[0]->Platform; + auto Device = Platform->getDeviceFromNativeHandle(ZeDeviceHandle); + return Device ? ReturnValue(Device) : PI_INVALID_VALUE; + } else { + return PI_INVALID_VALUE; } - return PI_INVALID_VALUE; - } case PI_MEM_ALLOC_BASE_PTR: { void *Base; ZE_CALL(zeMemGetAddressRange(Context->ZeContext, Ptr, &Base, nullptr)); diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 682c9f3195a23..6d9d49f1de928 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -80,6 +80,8 @@ struct _pi_platform { // Cache pi_devices for reuse std::vector PiDevicesCache; std::mutex PiDevicesCacheMutex; + pi_device getDeviceFromNativeHandle(ze_device_handle_t); + // Maximum Number of Command Lists that can be created. // This Value is initialized to 20000, but can be changed by the user // thru the environment variable SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE @@ -185,29 +187,30 @@ struct _pi_device : _pi_object { }; struct _pi_context : _pi_object { - _pi_context(pi_device Device) - : Device{Device}, ZeCommandListInit{nullptr}, ZeEventPool{nullptr}, - NumEventsAvailableInEventPool{}, NumEventsLiveInEventPool{} { - // TODO: when support for multiple devices is added, here we should - // loop over all the devices and initialize allocator context for each - // pair (context, device) - SharedMemAllocContexts.emplace( - std::piecewise_construct, std::make_tuple(Device), - std::make_tuple(std::unique_ptr( - new USMSharedMemoryAlloc(this, Device)))); - DeviceMemAllocContexts.emplace( - std::piecewise_construct, std::make_tuple(Device), - std::make_tuple(std::unique_ptr( - new USMDeviceMemoryAlloc(this, Device)))); + _pi_context(pi_uint32 NumDevices, const pi_device *Devs) + : Devices{Devs, Devs + NumDevices}, ZeCommandListInit{nullptr}, + ZeEventPool{nullptr}, NumEventsAvailableInEventPool{}, + NumEventsLiveInEventPool{} { + // Create USM allocator context for each pair (device, context). + for (uint32_t I = 0; I < NumDevices; I++) { + pi_device Device = Devs[I]; + SharedMemAllocContexts.emplace( + std::piecewise_construct, std::make_tuple(Device), + std::make_tuple(std::unique_ptr( + new USMSharedMemoryAlloc(this, Device)))); + DeviceMemAllocContexts.emplace( + std::piecewise_construct, std::make_tuple(Device), + std::make_tuple(std::unique_ptr( + new USMDeviceMemoryAlloc(this, Device)))); + } } // A L0 context handle is primarily used during creation and management of // resources that may be used by multiple devices. ze_context_handle_t ZeContext; - // Keep the device here (must be exactly one) to return it when PI context - // is queried for devices. - pi_device Device; + // Keep the PI devices this PI context was created for. + std::vector Devices; // Immediate Level Zero command list for the device in this context, to be // used for initializations. To be created as: From d31184e11fccae0b241df9c85e136f23dc2ad73c Mon Sep 17 00:00:00 2001 From: mdtoguchi <47896532+mdtoguchi@users.noreply.github.com> Date: Wed, 16 Sep 2020 15:30:56 -0700 Subject: [PATCH 415/465] [Driver][SYCL] Make /MD the default for -fsycl (#2478) When using -fsycl on Windows, make /MD the default behavior. Any usage of /MT will not be allowed and the driver will error upon usage. --- .../clang/Basic/DiagnosticDriverKinds.td | 1 + clang/lib/Driver/ToolChains/Clang.cpp | 20 +++++++++++++++---- clang/lib/Driver/ToolChains/MSVC.cpp | 3 +-- clang/test/Driver/sycl-MD-default.cpp | 16 +++++++++++++++ clang/test/Driver/sycl-offload.c | 3 +-- sycl/test/regression/msvc_crt.cpp | 2 -- 6 files changed, 35 insertions(+), 10 deletions(-) create mode 100644 clang/test/Driver/sycl-MD-default.cpp diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index d4e2fdf23a9e1..65af96ad246c2 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -132,6 +132,7 @@ def err_drv_invalid_Xsycl_frontend_with_args : Error< "invalid -Xsycl-target-frontend argument: '%0', options requiring arguments are unsupported">; def err_drv_bad_fpga_device_count : Error< "More than one FPGA specific device binary found in input objects">; +def err_drv_unsupported_opt_dpcpp : Error<"option '%0' unsupported with DPC++">; def err_drv_argument_only_allowed_with : Error< "invalid argument '%0' only allowed with '%1'">; def err_drv_argument_not_allowed_with : Error< diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9a3771abbc7d7..82aad7db78070 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6769,14 +6769,26 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType, bool *EmitCodeView) const { unsigned RTOptionID = options::OPT__SLASH_MT; bool isNVPTX = getToolChain().getTriple().isNVPTX(); + bool isSYCL = + Args.hasArg(options::OPT_fsycl) || + getToolChain().getTriple().getEnvironment() == llvm::Triple::SYCLDevice; + // For SYCL Windows, /MD is the default. + if (isSYCL) + RTOptionID = options::OPT__SLASH_MD; if (Args.hasArg(options::OPT__SLASH_LDd)) - // The /LDd option implies /MTd. The dependent lib part can be overridden, - // but defining _DEBUG is sticky. - RTOptionID = options::OPT__SLASH_MTd; + // The /LDd option implies /MTd (/MDd for SYCL). The dependent lib part + // can be overridden but defining _DEBUG is sticky. + RTOptionID = isSYCL ? options::OPT__SLASH_MDd : options::OPT__SLASH_MTd; - if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group)) + if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group)) { RTOptionID = A->getOption().getID(); + if (isSYCL && (RTOptionID == options::OPT__SLASH_MT || + RTOptionID == options::OPT__SLASH_MTd)) + // Use of /MT or /MTd is not supported for SYCL. + getToolChain().getDriver().Diag(diag::err_drv_unsupported_opt_dpcpp) + << A->getOption().getName(); + } StringRef FlagForCRT; switch (RTOptionID) { diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 28ed44bb0e2d6..01f68e38d65b6 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -375,8 +375,7 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (!C.getDriver().IsCLMode() && !Args.hasArg(options::OPT_nostdlib) && Args.hasArg(options::OPT_fsycl) && !Args.hasArg(options::OPT_nolibsycl)) { - if (Args.hasArg(options::OPT__SLASH_MDd) || - Args.hasArg(options::OPT__SLASH_MTd)) + if (Args.hasArg(options::OPT__SLASH_MDd)) CmdArgs.push_back("-defaultlib:sycld.lib"); else CmdArgs.push_back("-defaultlib:sycl.lib"); diff --git a/clang/test/Driver/sycl-MD-default.cpp b/clang/test/Driver/sycl-MD-default.cpp new file mode 100644 index 0000000000000..b7ffd42f6a274 --- /dev/null +++ b/clang/test/Driver/sycl-MD-default.cpp @@ -0,0 +1,16 @@ +// REQUIRES: clang-driver + +// RUN: %clang_cl -### -fsycl -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DEFAULT %s +// RUN: %clang_cl -### -MD -fsycl -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DEFAULT %s +// RUN: %clang_cl -### -MDd -fsycl -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DEFAULT %s +// CHK-DEFAULT: "-D_MT" "-D_DLL" +// CHK-DEFAULT: "--dependent-lib=msvcrt{{d*}}" + +// RUN: %clang_cl -### -MT -fsycl -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-ERROR %s +// RUN: %clang_cl -### -MTd -fsycl -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-ERROR %s +// CHK-ERROR: option 'MT{{d*}}' unsupported with DPC++ diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index 5c8aad46b4c12..62bb6093a5949 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -589,9 +589,8 @@ // CHECK-LINK-NOLIBSYCL: "{{.*}}link{{(.exe)?}}" // CHECK-LINK-NOLIBSYCL-NOT: "-defaultlib:sycl.lib" -/// Check sycld.lib is chosen with /MDd and /MTd +/// Check sycld.lib is chosen with /MDd // RUN: %clang_cl -fsycl /MDd %s -o %t -### 2>&1 | FileCheck -check-prefix=CHECK-LINK-SYCL-DEBUG %s -// RUN: %clang_cl -fsycl /MTd %s -o %t -### 2>&1 | FileCheck -check-prefix=CHECK-LINK-SYCL-DEBUG %s // CHECK-LINK-SYCL-DEBUG: "--dependent-lib=sycld" // CHECK-LINK-SYCL-DEBUG-NOT: "-defaultlib:sycld.lib" diff --git a/sycl/test/regression/msvc_crt.cpp b/sycl/test/regression/msvc_crt.cpp index 78903b52ccdb0..e3022045ae10f 100644 --- a/sycl/test/regression/msvc_crt.cpp +++ b/sycl/test/regression/msvc_crt.cpp @@ -2,8 +2,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t1.exe // RUN: %clang_cl -fsycl /MDd -o %t2.exe %s // RUN: %CPU_RUN_PLACEHOLDER %t2.exe -// RUN: %clang_cl -fsycl /MT -o %t3.exe %s -// RUN: %CPU_RUN_PLACEHOLDER %t3.exe // REQUIRES: system-windows //==-------------- msvc_crt.cpp - SYCL MSVC CRT test -----------------------==// // From 14e227c4ebe569b7d43f29452a97ae2f6eed7e2c Mon Sep 17 00:00:00 2001 From: Byoungro So Date: Wed, 16 Sep 2020 21:36:33 -0700 Subject: [PATCH 416/465] [SYCL] Implement new env var SYCL_DEVICE_FILTER (#2239) * [SYCL] Implement new env var SYCL_DEVICE_TRIPLE This new env var takes a list of triples {device_type, backend, device_num} 1. This list means SYCL_RT will only use those specified devices. 2. This list also limits related plugins to be loaded by SYCL RT. This PR only implemented new env var and selective plugin loading (#2) Signed-off-by: Byoungro So --- sycl/doc/EnvironmentVariables.md | 5 +- sycl/include/CL/sycl/backend_types.hpp | 13 +- sycl/include/CL/sycl/detail/device_filter.hpp | 83 ++++++++++++ sycl/source/CMakeLists.txt | 1 + sycl/source/detail/config.def | 1 + sycl/source/detail/config.hpp | 31 +++++ sycl/source/detail/device_filter.cpp | 122 ++++++++++++++++++ sycl/source/detail/filter_selector_impl.cpp | 9 +- sycl/source/detail/filter_selector_impl.hpp | 11 +- sycl/source/detail/pi.cpp | 31 ++++- sycl/source/device_selector.cpp | 77 ++++++++++- sycl/test/filter_selector/select_device.cpp | 80 ++++++++++++ .../filter_selector/select_device_acc.cpp | 69 ++++++++++ .../filter_selector/select_device_cpu.cpp | 67 ++++++++++ .../filter_selector/select_device_cuda.cpp | 68 ++++++++++ .../select_device_level_zero.cpp | 68 ++++++++++ .../filter_selector/select_device_opencl.cpp | 58 +++++++++ 17 files changed, 766 insertions(+), 28 deletions(-) create mode 100644 sycl/include/CL/sycl/detail/device_filter.hpp create mode 100644 sycl/source/detail/device_filter.cpp create mode 100644 sycl/test/filter_selector/select_device.cpp create mode 100644 sycl/test/filter_selector/select_device_acc.cpp create mode 100644 sycl/test/filter_selector/select_device_cpu.cpp create mode 100644 sycl/test/filter_selector/select_device_cuda.cpp create mode 100644 sycl/test/filter_selector/select_device_level_zero.cpp create mode 100644 sycl/test/filter_selector/select_device_opencl.cpp diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index 0a1dcd0a03198..79f8688a83001 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -12,8 +12,9 @@ subject to change. Do not rely on these variables in production code. | Environment variable | Values | Description | | -------------------- | ------ | ----------- | | SYCL_PI_TRACE | Described [below](#sycl_pi_trace-options) | Enable specified level of tracing for PI. | -| SYCL_BE | PI_OPENCL, PI_LEVEL_ZERO, PI_CUDA | Force SYCL RT to consider only devices of the specified backend during the device selection. | -| SYCL_DEVICE_TYPE | One of: CPU, GPU, ACC, HOST | Force SYCL to use the specified device type. If unset, default selection rules are applied. If set to any unlisted value, this control has no effect. If the requested device type is not found, a `cl::sycl::runtime_error` exception is thrown. If a non-default device selector is used, a device must satisfy both the selector and this control to be chosen. This control only has effect on devices created with a selector. | +| SYCL_BE | PI_OPENCL, PI_LEVEL_ZERO, PI_CUDA | Force SYCL RT to consider only devices of the specified backend during the device selection. We are planning to deprecate SYCL_BE environment variable in the future. The specific grace period is not decided yet. Please use the new env var SYCL_DEVICE_FILTER instead. | +| SYCL_DEVICE_TYPE | One of: CPU, GPU, ACC, HOST | Force SYCL to use the specified device type. If unset, default selection rules are applied. If set to any unlisted value, this control has no effect. If the requested device type is not found, a `cl::sycl::runtime_error` exception is thrown. If a non-default device selector is used, a device must satisfy both the selector and this control to be chosen. This control only has effect on devices created with a selector. We are planning to deprecate SYCL_DEVICE_TYPE environment variable in the future. The specific grace period is not decided yet. Please use the new env var SYCL_DEVICE_FILTER instead. | +| SYCL_DEVICE_FILTER (tentative name) | {backend:device_type:device_num} | Limits the SYCL RT to use only a subset of the system's devices. Setting this environment variable affects all of the device query functions and all of the device selectors. The value of this environment variable is a comma separated list of filters, where each filter is a triple of the form "backend:device_type:device_num" (without the quotes). Each element of the triple is optional, but each filter must have at least one value. Possible values of "backend" are "host", "level_zero", "opencl", "cuda", or "\*". Possible values of "device_type" are "host", "cpu", "gpu", "acc", or "\*". Device_num is an integer that indexes the enumeration of devices from the sycl::platform::get_device() call, where the first device in that enumeration has index zero. Assuming a filter has all three elements of the triple, it selects only those devices that come from the given backend, have the specified device type, AND have the given device index. If more than one filter is specified, the RT is restricted to the union of devices selected by all filters. The RT always includes the "host" backend and the host device regardless of the filter because the SYCL language requires this device to always be present. Therefore, including "host" in the list of filters is allowed but is unnecessary. Note that the standard selectors like gpu_selector or cpu_selector will throw an exception if the filtered list of devices does not include a device that satisfies the selector. In particular, limiting the devices to only those supported by the "level_zero" backend will cause the cpu_selector to throw an exception since that backend does not support any CPU devices. This environment variable can be used to limit loading only specified plugins into the SYCL RT. | | SYCL_PROGRAM_COMPILE_OPTIONS | String of valid OpenCL compile options | Override compile options for all programs. | | SYCL_PROGRAM_LINK_OPTIONS | String of valid OpenCL link options | Override link options for all programs. | | SYCL_USE_KERNEL_SPV | Path to the SPIR-V binary | Load device image from the specified file. If runtime is unable to read the file, `cl::sycl::runtime_error` exception is thrown.| diff --git a/sycl/include/CL/sycl/backend_types.hpp b/sycl/include/CL/sycl/backend_types.hpp index 5cf9ab922124f..80885a47c0b0c 100644 --- a/sycl/include/CL/sycl/backend_types.hpp +++ b/sycl/include/CL/sycl/backend_types.hpp @@ -18,23 +18,26 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -enum class backend : char { host, opencl, level_zero, cuda }; +enum class backend : char { host, opencl, level_zero, cuda, all }; template struct interop; inline std::ostream &operator<<(std::ostream &Out, backend be) { switch (be) { case backend::host: - Out << std::string("host"); + Out << "host"; break; case backend::opencl: - Out << std::string("opencl"); + Out << "opencl"; break; case backend::level_zero: - Out << std::string("level_zero"); + Out << "level_zero"; break; case backend::cuda: - Out << std::string("cuda"); + Out << "cuda"; + break; + case backend::all: + Out << "all"; } return Out; } diff --git a/sycl/include/CL/sycl/detail/device_filter.hpp b/sycl/include/CL/sycl/detail/device_filter.hpp new file mode 100644 index 0000000000000..b65cf709d9dc0 --- /dev/null +++ b/sycl/include/CL/sycl/detail/device_filter.hpp @@ -0,0 +1,83 @@ +//==---------- device_filter.hpp - SYCL device filter descriptor -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +struct device_filter { + backend Backend = backend::all; + info::device_type DeviceType = info::device_type::all; + int DeviceNum = 0; + bool HasBackend = false; + bool HasDeviceType = false; + bool HasDeviceNum = false; + int MatchesSeen = 0; + + device_filter(){}; + device_filter(const std::string &FilterString); + friend std::ostream &operator<<(std::ostream &Out, + const device_filter &Filter); +}; + +class device_filter_list { + std::vector FilterList; + +public: + device_filter_list() {} + device_filter_list(const std::string &FilterString); + device_filter_list(device_filter &Filter); + void addFilter(device_filter &Filter); + std::vector &get() { return FilterList; } + friend std::ostream &operator<<(std::ostream &Out, + const device_filter_list &List); +}; + +inline std::ostream &operator<<(std::ostream &Out, + const device_filter &Filter) { + Out << Filter.Backend << ":"; + if (Filter.DeviceType == info::device_type::host) { + Out << "host"; + } else if (Filter.DeviceType == info::device_type::cpu) { + Out << "cpu"; + } else if (Filter.DeviceType == info::device_type::gpu) { + Out << "gpu"; + } else if (Filter.DeviceType == info::device_type::accelerator) { + Out << "accelerator"; + } else if (Filter.DeviceType == info::device_type::all) { + Out << "*"; + } else { + Out << "unknown"; + } + if (Filter.HasDeviceNum) { + Out << ":" << Filter.DeviceNum; + } + return Out; +} + +inline std::ostream &operator<<(std::ostream &Out, + const device_filter_list &List) { + for (const device_filter &Filter : List.FilterList) { + Out << Filter; + Out << ","; + } + return Out; +} + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 7c70f770d876a..fd2c69d59494d 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -108,6 +108,7 @@ set(SYCL_SOURCES "detail/config.cpp" "detail/context_impl.cpp" "detail/device_binary_image.cpp" + "detail/device_filter.cpp" "detail/device_impl.cpp" "detail/error_handling/enqueue_kernel.cpp" "detail/event_impl.cpp" diff --git a/sycl/source/detail/config.def b/sycl/source/detail/config.def index 846c7a3b1f13e..3f097c3796c25 100644 --- a/sycl/source/detail/config.def +++ b/sycl/source/detail/config.def @@ -16,3 +16,4 @@ CONFIG(SYCL_DEVICE_ALLOWLIST, 1024, __SYCL_DEVICE_ALLOWLIST) CONFIG(SYCL_BE, 16, __SYCL_BE) CONFIG(SYCL_PI_TRACE, 16, __SYCL_PI_TRACE) CONFIG(SYCL_DEVICELIB_NO_FALLBACK, 1, __SYCL_DEVICELIB_NO_FALLBACK) +CONFIG(SYCL_DEVICE_FILTER, 1024, __SYCL_DEVICE_FILTER) diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index ac6fe8fbcbd2b..4f1b54126ed72 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -10,7 +10,9 @@ #include #include +#include #include +#include #include #include @@ -163,6 +165,35 @@ template <> class SYCLConfig { } }; +template <> class SYCLConfig { + using BaseT = SYCLConfigBase; + +public: + static device_filter_list *get() { + static bool Initialized = false; + static device_filter_list *FilterList = nullptr; + + // Configuration parameters are processed only once, like reading a string + // from environment and converting it into a typed object. + if (Initialized) { + return FilterList; + } + + const char *ValStr = BaseT::getRawValue(); + if (ValStr) { + static device_filter_list DFL{ValStr}; + FilterList = &DFL; + } + // As mentioned above, configuration parameters are processed only once. + // If multiple threads are checking this env var at the same time, + // they will end up setting the configration to the same value. + // If other threads check after one thread already set configration, + // the threads will get the same value as the first thread. + Initialized = true; + return FilterList; + } +}; + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/device_filter.cpp b/sycl/source/detail/device_filter.cpp new file mode 100644 index 0000000000000..4b2c2df525268 --- /dev/null +++ b/sycl/source/detail/device_filter.cpp @@ -0,0 +1,122 @@ +//==------------------- device_filter.cpp ----------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +device_filter::device_filter(const std::string &FilterString) { + const std::array, 5> + SyclDeviceTypeMap = {{{"host", info::device_type::host}, + {"cpu", info::device_type::cpu}, + {"gpu", info::device_type::gpu}, + {"acc", info::device_type::accelerator}, + {"*", info::device_type::all}}}; + const std::array, 5> SyclBeMap = { + {{"host", backend::host}, + {"opencl", backend::opencl}, + {"level_zero", backend::level_zero}, + {"cuda", backend::cuda}, + {"*", backend::all}}}; + + size_t Cursor = 0; + size_t ColonPos = 0; + auto findElement = [&](auto Element) { + size_t Found = FilterString.find(Element.first, Cursor); + if (Found == std::string::npos) + return false; + Cursor = Found; + return true; + }; + auto selectElement = [&](auto It, auto Map, auto EltIfNotFound) { + if (It == Map.end()) + return EltIfNotFound; + ColonPos = FilterString.find(":", Cursor); + if (ColonPos != std::string::npos) + Cursor = ColonPos + 1; + else + Cursor = Cursor + It->first.size(); + return It->second; + }; + + // Handle the optional 1st field of the filter, backend + // Check if the first entry matches with a known backend type + auto It = + std::find_if(std::begin(SyclBeMap), std::end(SyclBeMap), findElement); + // If no match is found, set the backend type backend::all + // which actually means 'any backend' will be a match. + Backend = selectElement(It, SyclBeMap, backend::all); + + // Handle the optional 2nd field of the filter - device type. + // Check if the 2nd entry matches with any known device type. + if (Cursor >= FilterString.size()) { + DeviceType = info::device_type::all; + } else { + auto Iter = std::find_if(std::begin(SyclDeviceTypeMap), + std::end(SyclDeviceTypeMap), findElement); + // If no match is found, set device_type 'all', + // which actually means 'any device_type' will be a match. + DeviceType = selectElement(Iter, SyclDeviceTypeMap, info::device_type::all); + } + + // Handle the optional 3rd field of the filter, device number + // Try to convert the remaining string to an integer. + // If succeessful, the converted integer is the desired device num. + if (Cursor < FilterString.size()) { + try { + DeviceNum = stoi(FilterString.substr(ColonPos + 1)); + HasDeviceNum = true; + } catch (...) { + std::string Message = + std::string("Invalid device filter: ") + FilterString + + "\nPossible backend values are {host,opencl,level_zero,cuda,*}.\n" + "Possible device types are {host,cpu,gpu,acc,*}.\n" + "Device number should be an non-negative integer.\n"; + throw cl::sycl::invalid_parameter_error(Message, PI_INVALID_VALUE); + } + } +} + +device_filter_list::device_filter_list(const std::string &FilterStr) { + // First, change the string in all lowercase. + // This means we allow the user to use both uppercase and lowercase strings. + std::string FilterString = FilterStr; + std::transform(FilterString.begin(), FilterString.end(), FilterString.begin(), + ::tolower); + // SYCL_DEVICE_FILTER can set multiple filters separated by commas. + // convert each filter triple string into an istance of device_filter class. + size_t Pos = 0; + while (Pos < FilterString.size()) { + size_t CommaPos = FilterString.find(",", Pos); + if (CommaPos == std::string::npos) { + CommaPos = FilterString.size(); + } + std::string SubString = FilterString.substr(Pos, CommaPos - Pos); + FilterList.push_back(device_filter(SubString)); + Pos = CommaPos + 1; + } +} + +device_filter_list::device_filter_list(device_filter &Filter) { + FilterList.push_back(Filter); +} + +void device_filter_list::addFilter(device_filter &Filter) { + FilterList.push_back(Filter); +} + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/filter_selector_impl.cpp b/sycl/source/detail/filter_selector_impl.cpp index 58d85cf6e7388..b062c62cfe075 100644 --- a/sycl/source/detail/filter_selector_impl.cpp +++ b/sycl/source/detail/filter_selector_impl.cpp @@ -62,13 +62,13 @@ filter create_filter(const std::string &Input) { for (const std::string &Token : Tokens) { if (Token == "cpu" && !Result.HasDeviceType) { - Result.DeviceType = PI_DEVICE_TYPE_CPU; + Result.DeviceType = info::device_type::cpu; Result.HasDeviceType = true; } else if (Token == "gpu" && !Result.HasDeviceType) { - Result.DeviceType = PI_DEVICE_TYPE_GPU; + Result.DeviceType = info::device_type::gpu; Result.HasDeviceType = true; } else if (Token == "accelerator" && !Result.HasDeviceType) { - Result.DeviceType = PI_DEVICE_TYPE_ACC; + Result.DeviceType = info::device_type::accelerator; Result.HasDeviceType = true; } else if (Token == "opencl" && !Result.HasBackend) { Result.Backend = backend::opencl; @@ -134,8 +134,7 @@ int filter_selector_impl::operator()(const device &Dev) const { BackendOK = (BE == Filter.Backend); } if (Filter.HasDeviceType) { - RT::PiDeviceType DT = - sycl::detail::getSyclObjImpl(Dev)->get_device_type(); + info::device_type DT = Dev.get_info(); DeviceTypeOK = (DT == Filter.DeviceType); } if (Filter.HasDeviceNum) { diff --git a/sycl/source/detail/filter_selector_impl.hpp b/sycl/source/detail/filter_selector_impl.hpp index a4cde25f7c66c..99392fbfa2564 100644 --- a/sycl/source/detail/filter_selector_impl.hpp +++ b/sycl/source/detail/filter_selector_impl.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include @@ -21,15 +22,7 @@ class device; namespace ONEAPI { namespace detail { -struct filter { - backend Backend = backend::host; - RT::PiDeviceType DeviceType = PI_DEVICE_TYPE_ALL; - int DeviceNum = 0; - bool HasBackend = false; - bool HasDeviceType = false; - bool HasDeviceNum = false; - int MatchesSeen = 0; -}; +typedef struct sycl::detail::device_filter filter; class filter_selector_impl { public: diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 578728e389ee8..598994783d897 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -14,6 +14,7 @@ #include "context_impl.hpp" #include #include +#include #include #include #include @@ -214,9 +215,33 @@ bool findPlugins(vector_class> &PluginNames) { // search is done for libpi_opencl.so/pi_opencl.dll file in LD_LIBRARY_PATH // env only. // - PluginNames.emplace_back(OPENCL_PLUGIN_NAME, backend::opencl); - PluginNames.emplace_back(LEVEL_ZERO_PLUGIN_NAME, backend::level_zero); - PluginNames.emplace_back(CUDA_PLUGIN_NAME, backend::cuda); + device_filter_list *FilterList = SYCLConfig::get(); + if (!FilterList) { + PluginNames.emplace_back(OPENCL_PLUGIN_NAME, backend::opencl); + PluginNames.emplace_back(LEVEL_ZERO_PLUGIN_NAME, backend::level_zero); + PluginNames.emplace_back(CUDA_PLUGIN_NAME, backend::cuda); + } else { + std::vector Filters = FilterList->get(); + bool OpenCLFound = false; + bool LevelZeroFound = false; + bool CudaFound = false; + for (const device_filter &Filter : Filters) { + backend Backend = Filter.Backend; + if (!OpenCLFound && + (Backend == backend::opencl || Backend == backend::all)) { + PluginNames.emplace_back(OPENCL_PLUGIN_NAME, backend::opencl); + OpenCLFound = true; + } else if (!LevelZeroFound && + (Backend == backend::level_zero || Backend == backend::all)) { + PluginNames.emplace_back(LEVEL_ZERO_PLUGIN_NAME, backend::level_zero); + LevelZeroFound = true; + } else if (!CudaFound && + (Backend == backend::cuda || Backend == backend::all)) { + PluginNames.emplace_back(CUDA_PLUGIN_NAME, backend::cuda); + CudaFound = true; + } + } + } return true; } diff --git a/sycl/source/device_selector.cpp b/sycl/source/device_selector.cpp index f0f9b23792599..8c664c3b98f7a 100644 --- a/sycl/source/device_selector.cpp +++ b/sycl/source/device_selector.cpp @@ -8,10 +8,12 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -34,6 +36,31 @@ static bool isDeviceOfPreferredSyclBe(const device &Device) { backend::level_zero; } +// Return true if the given device 'Dev' matches with any filter +static bool isForcedDevice(const device &Dev, int Index = -1) { + detail::device_filter_list *FilterList = + detail::SYCLConfig::get(); + + if (!FilterList) + return false; + info::device_type Type = Dev.get_info(); + backend Backend; + if (Type == info::device_type::host) + Backend = backend::host; + else + Backend = detail::getSyclObjImpl(Dev)->getPlugin().getBackend(); + + for (const detail::device_filter &Filter : FilterList->get()) { + if ((Filter.Backend == Backend || Filter.Backend == backend::all) && + (Filter.DeviceType == Type || + Filter.DeviceType == info::device_type::all)) { + if (Index < 0 || (Filter.HasDeviceNum && Filter.DeviceNum == Index)) + return true; + } + } + return false; +} + device device_selector::select_device() const { vector_class devices = device::get_devices(); int score = REJECT_DEVICE_SCORE; @@ -59,6 +86,13 @@ device device_selector::select_device() const { if (dev_score < 0) continue; + // If SYCL_DEVICE_FILTER is set, give a bonus point for the device + // whose index matches with desired device number. + int index = &dev - &devices[0]; + if (isForcedDevice(dev, index)) { + dev_score += 1000; + } + // SYCL spec says: "If more than one device receives the high score then // one of those tied devices will be returned, but which of the devices // from the tied set is to be returned is not defined". Here we give a @@ -103,7 +137,12 @@ int default_selector::operator()(const device &dev) const { Score = 50; // override always wins - if (dev.get_info() == detail::get_forced_type()) + // filter device gets a high point. + if (isForcedDevice(dev)) + Score += 1000; + + else if (dev.get_info() == + detail::get_forced_type()) Score += 1000; if (dev.is_gpu()) @@ -122,7 +161,16 @@ int gpu_selector::operator()(const device &dev) const { int Score = REJECT_DEVICE_SCORE; if (dev.is_gpu()) { - Score = 1000; + detail::device_filter_list *FilterList = + detail::SYCLConfig::get(); + if (FilterList) { + if (isForcedDevice(dev)) + Score = 1000; + else + return Score; + } else { + Score = 1000; + } // Give preference to device of SYCL BE. if (isDeviceOfPreferredSyclBe(dev)) Score += 50; @@ -132,8 +180,18 @@ int gpu_selector::operator()(const device &dev) const { int cpu_selector::operator()(const device &dev) const { int Score = REJECT_DEVICE_SCORE; + if (dev.is_cpu()) { - Score = 1000; + detail::device_filter_list *FilterList = + detail::SYCLConfig::get(); + if (FilterList) { + if (isForcedDevice(dev)) + Score = 1000; + else + return Score; + } else { + Score = 1000; + } // Give preference to device of SYCL BE. if (isDeviceOfPreferredSyclBe(dev)) Score += 50; @@ -143,8 +201,18 @@ int cpu_selector::operator()(const device &dev) const { int accelerator_selector::operator()(const device &dev) const { int Score = REJECT_DEVICE_SCORE; + if (dev.is_accelerator()) { - Score = 1000; + detail::device_filter_list *FilterList = + detail::SYCLConfig::get(); + if (FilterList) { + if (isForcedDevice(dev)) + Score = 1000; + else + return Score; + } else { + Score = 1000; + } // Give preference to device of SYCL BE. if (isDeviceOfPreferredSyclBe(dev)) Score += 50; @@ -154,6 +222,7 @@ int accelerator_selector::operator()(const device &dev) const { int host_selector::operator()(const device &dev) const { int Score = REJECT_DEVICE_SCORE; + if (dev.is_host()) { Score = 1000; // Give preference to device of SYCL BE. diff --git a/sycl/test/filter_selector/select_device.cpp b/sycl/test/filter_selector/select_device.cpp new file mode 100644 index 0000000000000..1e9ccb8b03e4f --- /dev/null +++ b/sycl/test/filter_selector/select_device.cpp @@ -0,0 +1,80 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RU: env SYCL_DEVICE_FILTER="*" %t.out +// RU: env SYCL_DEVICE_FILTER=cpu %t.out +// RU: env SYCL_DEVICE_FILTER=level_zero:gpu %t.out +// RU: env SYCL_DEVICE_FILTER=opencl:gpu %t.out +// RUN: env SYCL_DEVICE_FILTER=cpu,level_zero:gpu %t.out +// RUN: env SYCL_DEVICE_FILTER=opencl:acc:0 %t.out +// +// Checks if only specified device types can be acquired from select_device +// when SYCL_DEVICE_FILTER is set +// Checks that no device is selected when no device of desired type is +// available. +// +// REQUIRES: opencl,level_zero,host,cpu,gpu,accelerator + +#include +#include + +using namespace cl::sycl; +using namespace std; + +int main() { + const char *envVal = std::getenv("SYCL_DEVICE_FILTER"); + std::string forcedPIs; + if (envVal) { + std::cout << "SYCL_DEVICE_FILTER=" << envVal << std::endl; + forcedPIs = envVal; + } + if (!envVal || forcedPIs == "*" || + forcedPIs.find("level_zero:gpu") != std::string::npos) { + default_selector ds; + device d = ds.select_device(); + string name = d.get_platform().get_info(); + assert(name.find("Level-Zero") != string::npos); + std::cout << "Level-zero GPU Device is found: " << std::boolalpha + << d.is_gpu() << std::endl; + } + if (envVal && forcedPIs != "*" && + forcedPIs.find("opencl:gpu") != std::string::npos) { + gpu_selector gs; + device d = gs.select_device(); + string name = d.get_platform().get_info(); + assert(name.find("OpenCL") != string::npos); + std::cout << "OpenCL GPU Device is found: " << std::boolalpha << d.is_gpu() + << std::endl; + } + if (!envVal || forcedPIs == "*" || + forcedPIs.find("cpu") != std::string::npos) { + cpu_selector cs; + device d = cs.select_device(); + std::cout << "CPU device is found: " << d.is_cpu() << std::endl; + } + // HOST device is always available regardless of SYCL_DEVICE_FILTER + { + host_selector hs; + device d = hs.select_device(); + std::cout << "HOST device is found: " << d.is_host() << std::endl; + } + if (!envVal || forcedPIs == "*" || + forcedPIs.find("acc") != std::string::npos) { + accelerator_selector as; + device d = as.select_device(); + std::cout << "ACC device is found: " << d.is_accelerator() << std::endl; + } + if (envVal && (forcedPIs.find("cpu") == std::string::npos && + forcedPIs.find("opencl") == std::string::npos && + forcedPIs.find("*") == std::string::npos)) { + try { + cpu_selector cs; + device d = cs.select_device(); + } catch (...) { + std::cout << "Expectedly, CPU device is not found." << std::endl; + return 0; // expected + } + std::cerr << "Error: CPU device is found" << std::endl; + return -1; + } + + return 0; +} diff --git a/sycl/test/filter_selector/select_device_acc.cpp b/sycl/test/filter_selector/select_device_acc.cpp new file mode 100644 index 0000000000000..c7d933e4b5e26 --- /dev/null +++ b/sycl/test/filter_selector/select_device_acc.cpp @@ -0,0 +1,69 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RU: env SYCL_DEVICE_FILTER=acc %t.out +// +// Checks if only specified device types can be acquired from select_device +// when SYCL_DEVICE_FILTER is set +// Checks that no device is selected when no device of desired type is +// available. +// +// REQUIRES: opencl,host,accelerator + +#include +#include + +using namespace cl::sycl; +using namespace std; + +int main() { + const char *envVal = std::getenv("SYCL_DEVICE_FILTER"); + std::string forcedPIs; + if (envVal) { + std::cout << "SYCL_DEVICE_FILTER=" << envVal << std::endl; + forcedPIs = envVal; + } + { + default_selector ds; + device d = ds.select_device(); + string name = d.get_platform().get_info(); + assert(name.find("OpenCL") != string::npos); + std::cout << "ACC Device is found: " << std::boolalpha << d.is_accelerator() + << std::endl; + } + { + gpu_selector gs; + try { + device d = gs.select_device(); + std::cerr << "GPU Device is found in error: " << std::boolalpha + << d.is_gpu() << std::endl; + return -1; + } catch (...) { + std::cout << "Expectedly, GPU device is not found." << std::endl; + } + } + { + cpu_selector cs; + try { + device d = cs.select_device(); + std::cerr << "CPU Device is found in error: " << std::boolalpha + << d.is_cpu() << std::endl; + return -1; + } catch (...) { + std::cout << "Expectedly, CPU device not is found." << std::endl; + } + } + // HOST device is always available regardless of SYCL_DEVICE_FILTER + { + host_selector hs; + device d = hs.select_device(); + std::cout << "HOST device is found: " << d.is_host() << std::endl; + } + { + accelerator_selector as; + device d = as.select_device(); + string name = d.get_platform().get_info(); + assert(name.find("OpenCL") != string::npos); + std::cout << "ACC device is found: " << d.is_accelerator() << std::endl; + } + + return 0; +} diff --git a/sycl/test/filter_selector/select_device_cpu.cpp b/sycl/test/filter_selector/select_device_cpu.cpp new file mode 100644 index 0000000000000..e16b850018e22 --- /dev/null +++ b/sycl/test/filter_selector/select_device_cpu.cpp @@ -0,0 +1,67 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RU: env SYCL_DEVICE_FILTER=cpu %t.out +// +// Checks if only specified device types can be acquired from select_device +// when SYCL_DEVICE_FILTER is set +// Checks that no device is selected when no device of desired type is +// available. +// +// REQUIRES: opencl,host,cpu + +#include +#include + +using namespace cl::sycl; +using namespace std; + +int main() { + const char *envVal = std::getenv("SYCL_DEVICE_FILTER"); + std::string forcedPIs; + if (envVal) { + std::cout << "SYCL_DEVICE_FILTER=" << envVal << std::endl; + forcedPIs = envVal; + } + { + default_selector ds; + device d = ds.select_device(); + string name = d.get_platform().get_info(); + assert(name.find("OpenCL") != string::npos); + std::cout << "CPU Device is found: " << std::boolalpha << d.is_cpu() + << std::endl; + } + { + gpu_selector gs; + try { + device d = gs.select_device(); + std::cerr << "GPU Device is found: " << std::boolalpha << d.is_gpu() + << std::endl; + return -1; + } catch (...) { + std::cout << "Expectedly, GPU device is not found." << std::endl; + } + } + { + cpu_selector cs; + device d = cs.select_device(); + std::cout << "CPU device is found: " << d.is_cpu() << std::endl; + } + // HOST device is always available regardless of SYCL_DEVICE_FILTER + { + host_selector hs; + device d = hs.select_device(); + std::cout << "HOST device is found: " << d.is_host() << std::endl; + } + { + accelerator_selector as; + try { + device d = as.select_device(); + std::cerr << "ACC device is found in error: " << d.is_accelerator() + << std::endl; + return -1; + } catch (...) { + std::cout << "Expectedly, ACC device is not found." << std::endl; + } + } + + return 0; +} diff --git a/sycl/test/filter_selector/select_device_cuda.cpp b/sycl/test/filter_selector/select_device_cuda.cpp new file mode 100644 index 0000000000000..a8b8ae15901e6 --- /dev/null +++ b/sycl/test/filter_selector/select_device_cuda.cpp @@ -0,0 +1,68 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_FILTER=cuda:gpu %t.out +// +// Checks if only specified device types can be acquired from select_device +// when SYCL_DEVICE_FILTER is set. +// Checks that no device is selected when no device of desired type is +// available. +// +// REQUIRES: cuda,host,gpu + +#include +#include + +using namespace cl::sycl; +using namespace std; + +int main() { + const char *envVal = getenv("SYCL_DEVICE_FILTER"); + string forcedPIs; + if (envVal) { + cout << "SYCL_DEVICE_FILTER=" << envVal << std::endl; + forcedPIs = envVal; + } + + { + default_selector ds; + device d = ds.select_device(); + string name = d.get_platform().get_info(); + assert(name.find("CUDA") != string::npos); + cout << "CUDA GPU Device is found: " << boolalpha << d.is_gpu() + << std::endl; + } + { + gpu_selector gs; + device d = gs.select_device(); + string name = d.get_platform().get_info(); + assert(name.find("CUDA") != string::npos); + cout << name << " is found: " << boolalpha << d.is_gpu() << std::endl; + } + { + cpu_selector cs; + try { + device d = cs.select_device(); + cerr << "CPU device is found in error: " << d.is_cpu() << std::endl; + return -1; + } catch (...) { + cout << "Expectedly, cpu device is not found." << std::endl; + } + } + // HOST device is always available regardless of SYCL_DEVICE_FILTER + { + host_selector hs; + device d = hs.select_device(); + cout << "HOST device is found: " << d.is_host() << std::endl; + } + { + accelerator_selector as; + try { + device d = as.select_device(); + cerr << "ACC device is found in error: " << d.is_accelerator() + << std::endl; + } catch (...) { + cout << "Expectedly, ACC device is not found." << std::endl; + } + } + + return 0; +} diff --git a/sycl/test/filter_selector/select_device_level_zero.cpp b/sycl/test/filter_selector/select_device_level_zero.cpp new file mode 100644 index 0000000000000..c808945889949 --- /dev/null +++ b/sycl/test/filter_selector/select_device_level_zero.cpp @@ -0,0 +1,68 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_FILTER=level_zero:gpu %t.out +// +// Checks if only specified device types can be acquired from select_device +// when SYCL_DEVICE_FILTER is set +// Checks that no device is selected when no device of desired type is +// available. +// +// REQUIRES: level_zero,host,gpu + +#include +#include + +using namespace cl::sycl; +using namespace std; + +int main() { + const char *envVal = getenv("SYCL_DEVICE_FILTER"); + string forcedPIs; + if (envVal) { + cout << "SYCL_DEVICE_FILTER=" << envVal << std::endl; + forcedPIs = envVal; + } + + { + default_selector ds; + device d = ds.select_device(); + string name = d.get_platform().get_info(); + assert(name.find("Level-Zero") != string::npos); + cout << "Level-Zero GPU Device is found: " << boolalpha << d.is_gpu() + << std::endl; + } + { + gpu_selector gs; + device d = gs.select_device(); + string name = d.get_platform().get_info(); + assert(name.find("Level-Zero") != string::npos); + cout << name << " is found: " << boolalpha << d.is_gpu() << std::endl; + } + { + cpu_selector cs; + try { + device d = cs.select_device(); + cerr << "CPU device is found in error: " << d.is_cpu() << std::endl; + return -1; + } catch (...) { + cout << "Expectedly, cpu device is not found." << std::endl; + } + } + // HOST device is always available regardless of SYCL_DEVICE_FILTER + { + host_selector hs; + device d = hs.select_device(); + cout << "HOST device is found: " << d.is_host() << std::endl; + } + { + accelerator_selector as; + try { + device d = as.select_device(); + cerr << "ACC device is found in error: " << d.is_accelerator() + << std::endl; + } catch (...) { + cout << "Expectedly, ACC device is not found." << std::endl; + } + } + + return 0; +} diff --git a/sycl/test/filter_selector/select_device_opencl.cpp b/sycl/test/filter_selector/select_device_opencl.cpp new file mode 100644 index 0000000000000..4629564ff1fb0 --- /dev/null +++ b/sycl/test/filter_selector/select_device_opencl.cpp @@ -0,0 +1,58 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_FILTER=opencl %t.out +// +// Checks if only specified device types can be acquired from select_device +// when SYCL_DEVICE_FILTER is set +// Checks that no device is selected when no device of desired type is +// available. +// +// REQUIRES: opencl,host,gpu,cpu,accelerator + +#include +#include + +using namespace cl::sycl; +using namespace std; + +int main() { + const char *envVal = getenv("SYCL_DEVICE_FILTER"); + string forcedPIs; + if (envVal) { + cout << "SYCL_DEVICE_FILTER=" << envVal << std::endl; + forcedPIs = envVal; + } + + { + default_selector ds; + device d = ds.select_device(); + string name = d.get_platform().get_info(); + assert(name.find("OpenCL") != string::npos); + cout << "OpenCL GPU Device is found: " << boolalpha << d.is_gpu() + << std::endl; + } + { + gpu_selector gs; + device d = gs.select_device(); + string name = d.get_platform().get_info(); + assert(name.find("OpenCL") != string::npos); + cout << name << " is found: " << boolalpha << d.is_gpu() << std::endl; + } + { + cpu_selector cs; + device d = cs.select_device(); + cout << "CPU device is found : " << d.is_cpu() << std::endl; + } + // HOST device is always available regardless of SYCL_DEVICE_FILTER + { + host_selector hs; + device d = hs.select_device(); + cout << "HOST device is found: " << d.is_host() << std::endl; + } + { + accelerator_selector as; + device d = as.select_device(); + cout << "ACC device is found : " << d.is_accelerator() << std::endl; + } + + return 0; +} From 2bb0cf78a1d66041b834cc8eccec3d749c782534 Mon Sep 17 00:00:00 2001 From: mdtoguchi <47896532+mdtoguchi@users.noreply.github.com> Date: Thu, 17 Sep 2020 02:45:36 -0700 Subject: [PATCH 417/465] [Driver][SYCL] Improve -std setting behaviors for Windows (#2461) When using -std:arg on Windows, the std setting was not being applied to the device compilation. Make needed adjustements to allow this which is due to the -std option differences in regards to Windows. --- clang/lib/Driver/ToolChains/Clang.cpp | 19 ++++++++++--------- clang/test/Driver/sycl-offload.c | 4 +++- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 82aad7db78070..0d0b0573e12d6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4056,10 +4056,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // device toolchain. bool UseSYCLTriple = IsSYCLDevice && (!IsSYCL || IsSYCLOffloadDevice); - // Adjust IsWindowsXYZ for CUDA/HIP compilations. Even when compiling in + // Adjust IsWindowsXYZ for CUDA/HIP/SYCL compilations. Even when compiling in // device mode (i.e., getToolchain().getTriple() is NVPTX/AMDGCN, not // Windows), we need to pass Windows-specific flags to cc1. - if (IsCuda || IsHIP) + if (IsCuda || IsHIP || IsSYCL) IsWindowsMSVC |= AuxTriple && AuxTriple->isWindowsMSVCEnvironment(); // C++ is not supported for IAMCU. @@ -5174,7 +5174,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, /*Joined=*/true); else if (IsWindowsMSVC) ImplyVCPPCXXVer = true; - else if (IsSYCL) + + if (IsSYCL && types::isCXX(InputType) && + !Args.hasArg(options::OPT__SLASH_std)) // For DPC++, we default to -std=c++17 for all compilations. Use of -std // on the command line will override. CmdArgs.push_back("-std=c++17"); @@ -5758,12 +5760,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } if (LanguageStandard.empty()) { - if (IsMSVC2015Compatible) - if (IsSYCL) - // For DPC++, C++17 is the default. - LanguageStandard = "-std=c++17"; - else - LanguageStandard = "-std=c++14"; + if (IsSYCL) + // For DPC++, C++17 is the default. + LanguageStandard = "-std=c++17"; + else if (IsMSVC2015Compatible) + LanguageStandard = "-std=c++14"; else LanguageStandard = "-std=c++11"; } diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index 62bb6093a5949..bd8087bb3151d 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -898,7 +898,9 @@ // -std=c++17 override check // RUN: %clangxx -### -c -fsycl -std=c++14 -xc++ %s 2>&1 | FileCheck -check-prefix=CHECK-STD-OVR %s // RUN: %clang_cl -### -c -fsycl /std:c++14 -TP %s 2>&1 | FileCheck -check-prefix=CHECK-STD-OVR %s -// CHECK-STD-OVR: clang{{.*}} "-std=c++14" +// CHECK-STD-OVR: clang{{.*}} "-emit-llvm-bc" {{.*}} "-std=c++14" +// CHECK-STD-OVR: clang{{.*}} "-fsyntax-only" {{.*}} "-std=c++14" +// CHECK-STD-OVR: clang{{.*}} "-emit-obj" {{.*}} "-std=c++14" // CHECK-STD-OVR-NOT: clang{{.*}} "-std=c++17" // TODO: SYCL specific fail - analyze and enable From cb3a5d714be676bd7d73f7feefeff61ec1a53797 Mon Sep 17 00:00:00 2001 From: mdtoguchi <47896532+mdtoguchi@users.noreply.github.com> Date: Thu, 17 Sep 2020 02:47:27 -0700 Subject: [PATCH 418/465] [Driver][SYCL][FPGA] Improve host object retention for fsycl-link (#2431) When compiling for -fsycl-link=early to -fsycl-link=image, the host object was lost in the shuffle. We want that to stick around. We do this by wrapping the host object before putting it into the device archive. When needed in subsequent compilations, we unbundle the host object. --- clang/lib/Driver/Driver.cpp | 23 ++++++++-- clang/lib/Driver/ToolChains/Clang.cpp | 24 +++++++++++ clang/test/Driver/sycl-offload-intelfpga.cpp | 44 ++++++++++++-------- 3 files changed, 71 insertions(+), 20 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 9946362e8ac17..9d400a945ba25 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5138,14 +5138,33 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // For an FPGA archive, we add the unbundling step above to take care of // the device side, but also unbundle here to extract the host side - for (const auto &LI : LinkerInputs) { + bool EarlyLink = false; + if (const Arg *A = Args.getLastArg(options::OPT_fsycl_link_EQ)) + EarlyLink = A->getValue() == StringRef("early"); + for (auto &LI : LinkerInputs) { Action *UnbundlerInput = nullptr; + auto wrapObject = [&] { + if (EarlyLink && Args.hasArg(options::OPT_fintelfpga)) { + // Only wrap the object with -fsycl-link=early + auto *BC = C.MakeAction(LI, types::TY_LLVM_BC); + auto *ASM = C.MakeAction(BC, types::TY_PP_Asm); + LI = C.MakeAction(ASM, types::TY_Object); + } + }; if (auto *IA = dyn_cast(LI)) { if (IA->getType() == types::TY_FPGA_AOCR || IA->getType() == types::TY_FPGA_AOCX) { // Add to unbundler. UnbundlerInput = LI; + } else { + std::string FileName = IA->getInputArg().getAsString(Args); + if ((IA->getType() == types::TY_Object && !isObjectFile(FileName)) || + IA->getInputArg().getOption().hasFlag(options::LinkerInput)) + continue; + wrapObject(); } + } else { + wrapObject(); } if (UnbundlerInput && !PL.empty()) { if (auto *IA = dyn_cast(UnbundlerInput)) { @@ -6159,8 +6178,6 @@ InputInfo Driver::BuildJobsForActionNoCache( OffloadingPrefix += "-wrapper"; if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o)) BaseInput = FinalOutput->getValue(); - else - BaseInput = getDefaultImageName(); } } Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0d0b0573e12d6..6424e58c9df28 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7699,6 +7699,30 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, assert(JA.getInputs().size() == Inputs.size() && "Not have inputs for all dependence actions??"); + // For FPGA, we wrap the host objects before archiving them when using + // -fsycl-link. This allows for better extraction control from the + // archive when we need the host objects for subsequent compilations. + if (OffloadingKind == Action::OFK_None && + C.getArgs().hasArg(options::OPT_fintelfpga) && + C.getArgs().hasArg(options::OPT_fsycl_link_EQ)) { + + // Add offload targets and inputs. + CmdArgs.push_back(C.getArgs().MakeArgString( + Twine("-kind=") + Action::GetOffloadKindName(OffloadingKind))); + CmdArgs.push_back( + TCArgs.MakeArgString(Twine("-target=") + Triple.getTriple())); + + // Add input. + assert(Inputs[0].isFilename() && "Invalid input."); + CmdArgs.push_back(TCArgs.MakeArgString(Inputs[0].getFilename())); + + C.addCommand(std::make_unique( + JA, *this, ResponseFileSupport::None(), + TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), + CmdArgs, Inputs)); + return; + } + // Add offload targets and inputs. for (unsigned I = 0; I < Inputs.size(); ++I) { // Get input's Offload Kind and ToolChain. diff --git a/clang/test/Driver/sycl-offload-intelfpga.cpp b/clang/test/Driver/sycl-offload-intelfpga.cpp index c42e60aacf07a..4f64388540916 100644 --- a/clang/test/Driver/sycl-offload-intelfpga.cpp +++ b/clang/test/Driver/sycl-offload-intelfpga.cpp @@ -35,7 +35,12 @@ // CHK-FPGA-LINK: llvm-spirv{{.*}} "-o" "[[OUTPUT3:.+\.spv]]" "-spirv-max-version=1.1" "-spirv-debug-info-version=legacy" "-spirv-ext=+all,-SPV_INTEL_usm_storage_classes" "[[OUTPUT2]]" // CHK-FPGA-EARLY: aoc{{.*}} "-o" "[[OUTPUT4:.+\.aocr]]" "[[OUTPUT3]]" "-sycl" "-rtl" // CHK-FPGA-IMAGE: aoc{{.*}} "-o" "[[OUTPUT5:.+\.aocx]]" "[[OUTPUT3]]" "-sycl" -// CHK-FPGA-LINK: llvm-ar{{.*}} "cr" "libfoo.a" "[[INPUT]]" +// CHK-FPGA-LINK: clang-offload-wrapper{{.*}} "-o=[[WRAPOUT:.+\.bc]]" "-host=x86_64-unknown-linux-gnu" {{.*}} "-kind=sycl" +// CHK-FPGA-LINK: llc{{.*}} "-o" "[[OBJOUTDEV:.+\.o]]" "[[WRAPOUT]]" +// CHK-FPGA-EARLY: clang-offload-wrapper{{.*}} "-host" "x86_64-unknown-linux-gnu" "-o" "[[WRAPOUTHOST:.+\.bc]]" "-kind=host" +// CHK-FPGA-EARLY: clang{{.*}} "-o" "[[OBJOUT:.+\.o]]" {{.*}} "[[WRAPOUTHOST]]" +// CHK-FPGA-EARLY: llvm-ar{{.*}} "cr" "libfoo.a" "[[OBJOUT]]" "[[OBJOUTDEV]]" +// CHK-FPGA-IMAGE: llvm-ar{{.*}} "cr" "libfoo.a" "[[INPUT]]" "[[OBJOUTDEV]]" // Output designation should not be used for unbundling step // RUN: touch %t.o @@ -60,7 +65,11 @@ // CHK-FPGA-LINK-WIN: sycl-post-link{{.*}} "-ir-output-only" "-spec-const=default" "-o" "[[OUTPUT2:.+\.bc]]" "[[OUTPUT2_1]]" // CHK-FPGA-LINK-WIN: llvm-spirv{{.*}} "-o" "[[OUTPUT3:.+\.spv]]" "-spirv-max-version=1.1" "-spirv-debug-info-version=legacy" "-spirv-ext=+all,-SPV_INTEL_usm_storage_classes" "[[OUTPUT2]]" // CHK-FPGA-LINK-WIN: aoc{{.*}} "-o" "[[OUTPUT5:.+\.aocr]]" "[[OUTPUT3]]" "-sycl" "-rtl" -// CHK-FPGA-LINK-WIN: lib.exe{{.*}} "[[INPUT]]" {{.*}} "-OUT:libfoo.lib" +// CHK-FPGA-LINK-WIN: clang-offload-wrapper{{.*}} "-o=[[WRAPOUT:.+\.bc]]" {{.*}} "-kind=sycl" +// CHK-FPGA-LINK-WIN: llc{{.*}} "-o" "[[OBJOUTDEV:.+\.obj]]" "[[WRAPOUT]]" +// CHK-FPGA-LINK-WIN: clang-offload-wrapper{{.*}} "-o" "[[WRAPOUTHOST:.+\.bc]]" "-kind=host" +// CHK-FPGA-LINK-WIN: clang{{.*}} "-o" "[[OBJOUT:.+\.obj]]" {{.*}} "[[WRAPOUTHOST]]" +// CHK-FPGA-LINK-WIN: lib.exe{{.*}} "[[OBJOUT]]" "[[OBJOUTDEV]]" {{.*}} "-OUT:libfoo.lib" /// Check -fintelfpga -fsycl-link with an FPGA archive // Create the dummy archive @@ -83,7 +92,7 @@ // CHK-FPGA-LINK-LIB-EARLY: clang-offload-wrapper{{.*}} "-host=x86_64-unknown-linux-gnu" "-target=fpga_aocr-intel-unknown-sycldevice" "-kind=sycl" "[[OUTPUT4]]" // CHK-FPGA-LINK-LIB: llc{{.*}} "-filetype=obj" "-o" "[[OUTPUT5:.+\.o]]" // CHK-FPGA-LINK-LIB: clang-offload-bundler{{.*}} "-type=aoo" "-targets=host-x86_64-unknown-linux-gnu" "-inputs=[[INPUT]]" "-outputs=[[OUTPUT1:.+\.txt]]" "-unbundle" -// CHK-FPGA-LINK-LIB: llvm-ar{{.*}} "cr" {{.*}} "@[[OUTPUT1]]" +// CHK-FPGA-LINK-LIB-IMAGE: llvm-ar{{.*}} "cr" {{.*}} "@[[OUTPUT1]]" /// Check the warning's emission for -fsycl-link's appending behavior // RUN: touch dummy.a @@ -186,28 +195,29 @@ /// -fintelfpga -fsycl-link from source // RUN: touch %t.cpp // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -fsycl-link=early %t.cpp -ccc-print-phases 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK-SRC,CHK-FPGA-LINK-SRC-DEFAULT %s -// RUN: %clang_cl -### -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -fsycl-link=early %t.cpp -ccc-print-phases 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK-SRC,CHK-FPGA-LINK-SRC-CL %s +// RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK-SRC %s +// RUN: %clang_cl -### --target=x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga -fsycl-link=early %t.cpp -ccc-print-phases 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK-SRC %s // CHK-FPGA-LINK-SRC: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl) // CHK-FPGA-LINK-SRC: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) // CHK-FPGA-LINK-SRC: 2: input, "[[INPUT]]", c++, (device-sycl) // CHK-FPGA-LINK-SRC: 3: preprocessor, {2}, c++-cpp-output, (device-sycl) // CHK-FPGA-LINK-SRC: 4: compiler, {3}, sycl-header, (device-sycl) -// CHK-FPGA-LINK-SRC-DEFAULT: 5: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {4}, c++-cpp-output -// CHK-FPGA-LINK-SRC-CL: 5: offload, "host-sycl (x86_64-pc-windows-msvc)" {1}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {4}, c++-cpp-output +// CHK-FPGA-LINK-SRC: 5: offload, "host-sycl (x86_64-unknown-linux-gnu)" {1}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {4}, c++-cpp-output // CHK-FPGA-LINK-SRC: 6: compiler, {5}, ir, (host-sycl) // CHK-FPGA-LINK-SRC: 7: backend, {6}, assembler, (host-sycl) // CHK-FPGA-LINK-SRC: 8: assembler, {7}, object, (host-sycl) -// CHK-FPGA-LINK-SRC: 9: linker, {8}, archive, (host-sycl) -// CHK-FPGA-LINK-SRC: 10: compiler, {3}, ir, (device-sycl) -// CHK-FPGA-LINK-SRC: 11: linker, {10}, ir, (device-sycl) -// CHK-FPGA-LINK-SRC: 12: sycl-post-link, {11}, ir, (device-sycl) -// CHK-FPGA-LINK-SRC: 13: llvm-spirv, {12}, spirv, (device-sycl) -// CHK-FPGA-LINK-SRC: 14: backend-compiler, {13}, fpga_aocr, (device-sycl) -// CHK-FPGA-LINK-SRC: 15: clang-offload-wrapper, {14}, object, (device-sycl) -// CHK-FPGA-LINK-SRC-DEFAULT: 16: offload, "host-sycl (x86_64-unknown-linux-gnu)" {9}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {15}, archive -// CHK-FPGA-LINK-SRC-CL: 16: offload, "host-sycl (x86_64-pc-windows-msvc)" {9}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {15}, archive +// CHK-FPGA-LINK-SRC: 9: clang-offload-wrapper, {8}, ir, (host-sycl) +// CHK-FPGA-LINK-SRC: 10: backend, {9}, assembler, (host-sycl) +// CHK-FPGA-LINK-SRC: 11: assembler, {10}, object, (host-sycl) +// CHK-FPGA-LINK-SRC: 12: linker, {11}, archive, (host-sycl) +// CHK-FPGA-LINK-SRC: 13: compiler, {3}, ir, (device-sycl) +// CHK-FPGA-LINK-SRC: 14: linker, {13}, ir, (device-sycl) +// CHK-FPGA-LINK-SRC: 15: sycl-post-link, {14}, ir, (device-sycl) +// CHK-FPGA-LINK-SRC: 16: llvm-spirv, {15}, spirv, (device-sycl) +// CHK-FPGA-LINK-SRC: 17: backend-compiler, {16}, fpga_aocr, (device-sycl) +// CHK-FPGA-LINK-SRC: 18: clang-offload-wrapper, {17}, object, (device-sycl) +// CHK-FPGA-LINK-SRC: 19: offload, "host-sycl (x86_64-unknown-linux-gnu)" {12}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {18}, archive /// -fintelfpga with -reuse-exe= // RUN: touch %t.cpp From 702642c9b517e48962aff7fe9ca4867b3009712a Mon Sep 17 00:00:00 2001 From: Dmitri Mokhov Date: Thu, 17 Sep 2020 09:15:57 -0500 Subject: [PATCH 419/465] [SYCL] Fix OpenCL version check when verifying SPIR-V online compilation support (#2445) Enumerate through historic OpenCL versions, like in the plugin. --- .../program_manager/program_manager.cpp | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 89d23d65937be..d37a6db7c862d 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -218,19 +218,20 @@ getOrBuild(KernelProgramCache &KPCache, KeyT &&CacheKey, AcquireFT &&Acquire, } } +// TODO replace this with a new PI API function static bool isDeviceBinaryTypeSupported(const context &C, RT::PiDeviceBinaryType Format) { + // All formats except PI_DEVICE_BINARY_TYPE_SPIRV are supported. + if (Format != PI_DEVICE_BINARY_TYPE_SPIRV) + return true; + const backend ContextBackend = detail::getSyclObjImpl(C)->getPlugin().getBackend(); // The CUDA backend cannot use SPIR-V - if (ContextBackend == backend::cuda && Format == PI_DEVICE_BINARY_TYPE_SPIRV) + if (ContextBackend == backend::cuda) return false; - // All formats except PI_DEVICE_BINARY_TYPE_SPIRV are supported. - if (Format != PI_DEVICE_BINARY_TYPE_SPIRV) - return true; - vector_class Devices = C.get_devices(); // Program type is SPIR-V, so we need a device compiler to do JIT. @@ -240,9 +241,14 @@ static bool isDeviceBinaryTypeSupported(const context &C, } // OpenCL 2.1 and greater require clCreateProgramWithIL - if ((ContextBackend == backend::opencl) && - C.get_platform().get_info() >= "2.1") - return true; + if (ContextBackend == backend::opencl) { + std::string ver = C.get_platform().get_info(); + if (ver.find("OpenCL 1.0") == std::string::npos && + ver.find("OpenCL 1.1") == std::string::npos && + ver.find("OpenCL 1.2") == std::string::npos && + ver.find("OpenCL 2.0") == std::string::npos) + return true; + } for (const device &D : Devices) { // We need cl_khr_il_program extension to be present From 53d909e28c09b7986efbcf6d1560399f09071919 Mon Sep 17 00:00:00 2001 From: kbobrovs Date: Thu, 17 Sep 2020 16:30:59 -0700 Subject: [PATCH 420/465] [SYCL] Make spec_constant default constructor public and available on host. (#2488) This is what the implemented spec prescribes: https://github.com/codeplaysoftware/standards-proposals/blob/master/spec-constant/index.md Signed-off-by: Konstantin S Bobrovsky --- .../ONEAPI/experimental/spec_constant.hpp | 9 ++-- sycl/test/spec_const/spec_const_hw.cpp | 51 +++++++++++++++++-- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp b/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp index bd8f4f9a3fb57..79190bcc20b99 100644 --- a/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp +++ b/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp @@ -32,17 +32,14 @@ class spec_const_error : public compile_program_error { }; template class spec_constant { -private: - // Implementation defined constructor. -#ifdef __SYCL_DEVICE_ONLY__ public: spec_constant() {} private: -#else - spec_constant(T Cst) : Val(Cst) {} -#endif #ifndef __SYCL_DEVICE_ONLY__ + // Implementation defined constructor. + spec_constant(T Cst) : Val(Cst) {} + T Val; #endif friend class cl::sycl::program; diff --git a/sycl/test/spec_const/spec_const_hw.cpp b/sycl/test/spec_const/spec_const_hw.cpp index bc6800a942e85..28c83957e19c8 100644 --- a/sycl/test/spec_const/spec_const_hw.cpp +++ b/sycl/test/spec_const/spec_const_hw.cpp @@ -23,16 +23,17 @@ class MyInt32Const; class MyFloatConst; +class MyConst; using namespace sycl; class KernelAAAi; class KernelBBBf; -int val = 10; +int global_val = 10; // Fetch a value at runtime. -int get_value() { return val; } +int get_value() { return global_val; } float foo( const cl::sycl::ONEAPI::experimental::spec_constant @@ -49,8 +50,22 @@ struct SCWrapper { cl::sycl::ONEAPI::experimental::spec_constant SC2; }; +// MyKernel is used to test default constructor +using AccT = sycl::accessor; +using ScT = sycl::ONEAPI::experimental::spec_constant; + +struct MyKernel { + MyKernel(AccT &Acc) : Acc(Acc) {} + + void setConst(ScT Sc) { this->Sc = Sc; } + + void operator()() const { Acc[0] = Sc.get(); } + AccT Acc; + ScT Sc; +}; + int main(int argc, char **argv) { - val = argc + 16; + global_val = argc + 16; cl::sycl::queue q(default_selector{}, [](exception_list l) { for (auto ep : l) { @@ -68,10 +83,11 @@ int main(int argc, char **argv) { std::cout << "Running on " << q.get_device().get_info() << "\n"; - std::cout << "val = " << val << "\n"; + std::cout << "global_val = " << global_val << "\n"; cl::sycl::program program1(q.get_context()); cl::sycl::program program2(q.get_context()); cl::sycl::program program3(q.get_context()); + cl::sycl::program program4(q.get_context()); int goldi = (int)get_value(); // TODO make this floating point once supported by the compiler @@ -83,6 +99,9 @@ int main(int argc, char **argv) { cl::sycl::ONEAPI::experimental::spec_constant f32 = program2.set_spec_constant(goldf); + cl::sycl::ONEAPI::experimental::spec_constant sc = + program4.set_spec_constant(goldi); + program1.build_with_kernel_type(); // Use an option (does not matter which exactly) to test different internal // SYCL RT execution path @@ -90,15 +109,20 @@ int main(int argc, char **argv) { SCWrapper W(program3); program3.build_with_kernel_type(); + + program4.build_with_kernel_type(); + int goldw = 6; std::vector veci(1); std::vector vecf(1); std::vector vecw(1); + std::vector vec(1); try { cl::sycl::buffer bufi(veci.data(), veci.size()); cl::sycl::buffer buff(vecf.data(), vecf.size()); cl::sycl::buffer bufw(vecw.data(), vecw.size()); + cl::sycl::buffer buf(vec.data(), vec.size()); q.submit([&](cl::sycl::handler &cgh) { auto acci = bufi.get_access(cgh); @@ -123,6 +147,19 @@ int main(int argc, char **argv) { program3.get_kernel(), [=]() { accw[0] = W.SC1.get() + W.SC2.get(); }); }); + // Check spec_constant default construction with subsequent initialization + q.submit([&](cl::sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + // Specialization constants specification says: + // cl::sycl::experimental::spec_constant is default constructible, + // although the object is not considered initialized until the result of + // the call to cl::sycl::program::set_spec_constant is assigned to it. + MyKernel Kernel(acc); // default construct inside MyKernel instance + Kernel.setConst(sc); // initialize to sc, returned by set_spec_constant + + cgh.single_task(program4.get_kernel(), Kernel); + }); + } catch (cl::sycl::exception &e) { std::cout << "*** Exception caught: " << e.what() << "\n"; return 1; @@ -146,6 +183,12 @@ int main(int argc, char **argv) { std::cout << "*** ERROR: " << valw << " != " << goldw << "(gold)\n"; passed = false; } + int val = vec[0]; + + if (val != goldi) { + std::cout << "*** ERROR: " << val << " != " << goldi << "(gold)\n"; + passed = false; + } std::cout << (passed ? "passed\n" : "FAILED\n"); return passed ? 0 : 1; } From f7d073d1c853a10c93acb0376e6f8ca726aa4058 Mon Sep 17 00:00:00 2001 From: Alexander Batashev Date: Fri, 18 Sep 2020 12:28:48 +0300 Subject: [PATCH 421/465] [SYCL] Implement SYCL_ONEAPI_accessor_properties (#2456) Signed-off-by: Alexander Batashev --- .../CL/sycl/ONEAPI/accessor_property_list.hpp | 232 +++++++++ .../CL/sycl/ONEAPI/atomic_accessor.hpp | 7 +- sycl/include/CL/sycl/ONEAPI/reduction.hpp | 6 +- sycl/include/CL/sycl/accessor.hpp | 480 ++++++++++++++++-- sycl/include/CL/sycl/buffer.hpp | 29 +- sycl/include/CL/sycl/detail/accessor_impl.hpp | 4 + sycl/include/CL/sycl/detail/buffer_impl.hpp | 3 +- sycl/include/CL/sycl/detail/image_impl.hpp | 3 +- .../CL/sycl/detail/property_list_base.hpp | 113 +++++ sycl/include/CL/sycl/handler.hpp | 12 +- sycl/include/CL/sycl/image.hpp | 14 +- sycl/include/CL/sycl/interop_handler.hpp | 3 +- sycl/include/CL/sycl/multi_ptr.hpp | 88 ++-- .../sycl/properties/accessor_properties.hpp | 73 +++ sycl/include/CL/sycl/property_list.hpp | 87 +--- .../CL/sycl/property_list_conversion.hpp | 23 + sycl/include/CL/sycl/types.hpp | 18 +- sycl/test/abi/layout_accessors.cpp | 18 +- .../accessor/accessor_property_list_ct.cpp | 94 ++++ .../accessor/accessor_property_list_rt.cpp | 25 + sycl/test/fpga_tests/buffer_location.cpp | 22 + .../fpga_tests/buffer_location_codegen.cpp | 47 ++ 22 files changed, 1208 insertions(+), 193 deletions(-) create mode 100644 sycl/include/CL/sycl/ONEAPI/accessor_property_list.hpp create mode 100644 sycl/include/CL/sycl/detail/property_list_base.hpp create mode 100644 sycl/include/CL/sycl/property_list_conversion.hpp create mode 100644 sycl/test/basic_tests/accessor/accessor_property_list_ct.cpp create mode 100644 sycl/test/basic_tests/accessor/accessor_property_list_rt.cpp create mode 100644 sycl/test/fpga_tests/buffer_location.cpp create mode 100644 sycl/test/fpga_tests/buffer_location_codegen.cpp diff --git a/sycl/include/CL/sycl/ONEAPI/accessor_property_list.hpp b/sycl/include/CL/sycl/ONEAPI/accessor_property_list.hpp new file mode 100644 index 0000000000000..0e96138d6a08a --- /dev/null +++ b/sycl/include/CL/sycl/ONEAPI/accessor_property_list.hpp @@ -0,0 +1,232 @@ +//==----- accessor_property_list.hpp --- SYCL accessor property list -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +// Forward declaration +template +class accessor; +namespace detail { +// This helper template must be specialized for nested instance template +// of each compile-time-constant property. +template struct IsCompileTimePropertyInstance : std::false_type {}; +} // namespace detail +namespace ONEAPI { + +template struct is_compile_time_property : std::false_type {}; + +/// Objects of the accessor_property_list class are containers for the SYCL +/// properties. +/// +/// Unlike \c property_list, accessor_property_list can take +/// compile-time-constant properties. +/// +/// \sa accessor +/// \sa property_list +/// +/// \ingroup sycl_api +template +class accessor_property_list : protected sycl::detail::PropertyListBase { + // These structures check if compile-time-constant property is present in + // list. For runtime properties this check is always true. + template struct AreSameTemplate : std::is_same {}; + template