Skip to content

Commit

Permalink
[X86] combineVectorHADDSUB - remove the broken HOP(x,x) merging code …
Browse files Browse the repository at this point in the history
…(PR51974)

This intention of this code turns out to be superfluous as we can handle this with shuffle combining, and it has a critical flaw in that it doesn't check for dependencies.

Fixes PR51974
  • Loading branch information
RKSimon committed Sep 27, 2021
1 parent 4b581e8 commit 468ff70
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 23 deletions.
23 changes: 0 additions & 23 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45014,32 +45014,9 @@ static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG,
"Unexpected horizontal add/sub opcode");

if (!shouldUseHorizontalOp(true, DAG, Subtarget)) {
// For slow-hop targets, if we have a hop with a single op, see if we already
// have another user that we can reuse and shuffle the result.
MVT VT = N->getSimpleValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (VT.is128BitVector() && LHS == RHS) {
for (SDNode *User : LHS->uses()) {
if (User != N && User->getOpcode() == N->getOpcode()) {
MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) {
return DAG.getBitcast(
VT,
DAG.getVectorShuffle(ShufVT, SDLoc(N),
DAG.getBitcast(ShufVT, SDValue(User, 0)),
DAG.getUNDEF(ShufVT), {0, 1, 0, 1}));
}
if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) {
return DAG.getBitcast(
VT,
DAG.getVectorShuffle(ShufVT, SDLoc(N),
DAG.getBitcast(ShufVT, SDValue(User, 0)),
DAG.getUNDEF(ShufVT), {2, 3, 2, 3}));
}
}
}
}

// HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)).
if (LHS != RHS && LHS.getOpcode() == N->getOpcode() &&
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/X86/horizontal-shuffle-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,25 @@ define <4 x float> @test_unpacklo_hadd_v4f32_unary(<4 x float> %0) {
ret <4 x float> %3
}

define <8 x i16> @PR51974(<8 x i16> %a0) {
; SSE-LABEL: PR51974:
; SSE: ## %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: phaddw %xmm0, %xmm1
; SSE-NEXT: phaddw %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: PR51974:
; AVX: ## %bb.0:
; AVX-NEXT: vphaddw %xmm0, %xmm0, %xmm1
; AVX-NEXT: vphaddw %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%r0 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a0)
%r1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %r0, <8 x i16> %a0)
ret <8 x i16> %r1
}

declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)
declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
Expand Down

0 comments on commit 468ff70

Please sign in to comment.