Skip to content

Commit

Permalink
[AArch64][GlobalISel] Legalize G_SHUFFLE_VECTOR for Odd-Sized Vectors (
Browse files Browse the repository at this point in the history
…#83038)

Legalize Smaller/Larger than legal vectors with i8 and i16 element
sizes.
Vectors with elements smaller than i8 will get widened to i8 elements.
  • Loading branch information
chuongg3 authored Feb 29, 2024
1 parent 5a0bd2a commit a344db7
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 108 deletions.
12 changes: 12 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,18 @@ class LegalizeRuleSet {
LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
}

/// Widen the scalar or vector element type to the next power of two that is
/// at least MinSize. No effect if the scalar size is a power of two.
LegalizeRuleSet &widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx,
unsigned MinSize = 0) {
using namespace LegalityPredicates;
return actionIf(
LegalizeAction::WidenScalar,
any(scalarOrEltNarrowerThan(TypeIdx, MinSize),
scalarOrEltSizeNotPow2(typeIdx(TypeIdx))),
LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
}

LegalizeRuleSet &narrowScalar(unsigned TypeIdx, LegalizeMutation Mutation) {
using namespace LegalityPredicates;
return actionIf(LegalizeAction::NarrowScalar, isScalar(typeIdx(TypeIdx)),
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2495,6 +2495,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
case TargetOpcode::G_SUB:
case TargetOpcode::G_SHUFFLE_VECTOR:
// Perform operation at larger width (any extension is fines here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -956,6 +956,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
},
changeTo(1, 0))
.moreElementsToNextPow2(0)
.widenScalarOrEltToNextPow2OrMinSize(0, 8)
.clampNumElements(0, v8s8, v16s8)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.moreElementsIf(
Expand Down
72 changes: 40 additions & 32 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
Original file line number Diff line number Diff line change
Expand Up @@ -287,39 +287,47 @@ body: |
; CHECK-NEXT: %q0:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: %q1:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %q2:_(<4 x s32>) = COPY $q2
; CHECK-NEXT: %vec_cond0:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q1
; CHECK-NEXT: %vec_cond1:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q2
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q1
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q2
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %w0(s32), [[C]]
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %cmp(s1)
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ZEXT]], 1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[SEXT_INREG]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s1>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C2]](s64)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s1>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s1>), [[DEF]], shufflemask(0, 0, 0, 0)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[C3]](s8)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s1>) = G_BUILD_VECTOR [[TRUNC1]](s1), [[TRUNC1]](s1), [[TRUNC1]](s1), [[TRUNC1]](s1)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[BUILD_VECTOR1]](<4 x s1>)
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[ANYEXT1]]
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[XOR]](<4 x s16>)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond0(<4 x s1>)
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT2]], [[ANYEXT3]]
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND]](<4 x s16>)
; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond1(<4 x s1>)
; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s1>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT4]], [[ANYEXT5]]
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND1]](<4 x s16>)
; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC3]](<4 x s1>)
; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s1>)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ANYEXT6]], [[ANYEXT7]]
; CHECK-NEXT: %select:_(<4 x s1>) = G_TRUNC [[OR]](<4 x s16>)
; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_ZEXT %select(<4 x s1>)
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %w0(s32), [[C]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP2]], 1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY]](s16), [[COPY1]](s16), [[COPY2]](s16), [[DEF1]](s16)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32)
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[TRUNC]](s16), [[C1]](s64)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16)
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<8 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s8>), [[UV5:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY3]](s16), [[COPY4]](s16), [[COPY5]](s16), [[C2]](s16)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>)
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC5]], [[ANYEXT1]]
; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC6]], [[XOR]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT2]], [[BUILD_VECTOR4]]
; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%w0:_(s32) = COPY $w0
Expand Down
232 changes: 156 additions & 76 deletions llvm/test/CodeGen/AArch64/shufflevector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,7 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2i1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v32i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v16i16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i1_zeroes
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4i8_zeroes
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v32i8_zeroes
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i16_zeroes
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v16i16_zeroes
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v3i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v3i8_zeroes

Expand Down Expand Up @@ -205,68 +197,142 @@ define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
}

define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){
; CHECK-LABEL: shufflevector_v4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ext v0.8b, v1.8b, v0.8b, #6
; CHECK-NEXT: zip1 v1.4h, v1.4h, v0.4h
; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
; CHECK-SD-LABEL: shufflevector_v4i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: ext v0.8b, v1.8b, v0.8b, #6
; CHECK-SD-NEXT: zip1 v1.4h, v1.4h, v0.4h
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
; CHECK-SD-NEXT: xtn v0.8b, v0.8h
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v4i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h2, v0.h[1]
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: mov h3, v1.h[1]
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
; CHECK-GI-NEXT: mov h4, v0.h[2]
; CHECK-GI-NEXT: mov h5, v0.h[3]
; CHECK-GI-NEXT: mov h6, v1.h[3]
; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
; CHECK-GI-NEXT: mov h2, v1.h[2]
; CHECK-GI-NEXT: mov v1.b[1], v3.b[0]
; CHECK-GI-NEXT: mov v0.b[2], v4.b[0]
; CHECK-GI-NEXT: mov v1.b[2], v2.b[0]
; CHECK-GI-NEXT: mov v0.b[3], v5.b[0]
; CHECK-GI-NEXT: mov v1.b[3], v6.b[0]
; CHECK-GI-NEXT: mov v0.b[4], v0.b[0]
; CHECK-GI-NEXT: mov v1.b[4], v0.b[0]
; CHECK-GI-NEXT: mov v0.b[5], v0.b[0]
; CHECK-GI-NEXT: mov v1.b[5], v0.b[0]
; CHECK-GI-NEXT: mov v0.b[6], v0.b[0]
; CHECK-GI-NEXT: mov v1.b[6], v0.b[0]
; CHECK-GI-NEXT: mov v0.b[7], v0.b[0]
; CHECK-GI-NEXT: mov v1.b[7], v0.b[0]
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
%c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 7>
%d = bitcast <4 x i8> %c to i32
ret i32 %d
}

define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){
; CHECK-LABEL: shufflevector_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q2 killed $q2 def $q1_q2
; CHECK-NEXT: adrp x8, .LCPI16_0
; CHECK-NEXT: adrp x9, .LCPI16_1
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI16_1]
; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
; CHECK-NEXT: ret
; CHECK-SD-LABEL: shufflevector_v32i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
; CHECK-SD-NEXT: adrp x8, .LCPI16_0
; CHECK-SD-NEXT: adrp x9, .LCPI16_1
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI16_1]
; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v32i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov v3.16b, v0.16b
; CHECK-GI-NEXT: adrp x8, .LCPI16_1
; CHECK-GI-NEXT: adrp x9, .LCPI16_0
; CHECK-GI-NEXT: mov v4.16b, v2.16b
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_1]
; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI16_0]
; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
ret <32 x i8> %c
}

define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
; CHECK-LABEL: shufflevector_v2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strh w9, [sp, #12]
; CHECK-NEXT: strh w8, [sp, #14]
; CHECK-NEXT: ldr w0, [sp, #12]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
; CHECK-SD-LABEL: shufflevector_v2i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
; CHECK-SD-NEXT: mov w8, v0.s[1]
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: strh w9, [sp, #12]
; CHECK-SD-NEXT: strh w8, [sp, #14]
; CHECK-SD-NEXT: ldr w0, [sp, #12]
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v2i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov s2, v0.s[1]
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: mov s3, v1.s[1]
; CHECK-GI-NEXT: adrp x8, .LCPI17_0
; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
; CHECK-GI-NEXT: mov v0.h[2], v0.h[0]
; CHECK-GI-NEXT: mov v1.h[2], v0.h[0]
; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
; CHECK-GI-NEXT: mov v1.h[3], v0.h[0]
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
%c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 1, i32 2>
%d = bitcast <2 x i16> %c to i32
ret i32 %d
}

define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){
; CHECK-LABEL: shufflevector_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q2 killed $q2 def $q1_q2
; CHECK-NEXT: adrp x8, .LCPI18_0
; CHECK-NEXT: adrp x9, .LCPI18_1
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI18_1]
; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
; CHECK-NEXT: ret
; CHECK-SD-LABEL: shufflevector_v16i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
; CHECK-SD-NEXT: adrp x8, .LCPI18_0
; CHECK-SD-NEXT: adrp x9, .LCPI18_1
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1]
; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v16i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov v3.16b, v0.16b
; CHECK-GI-NEXT: adrp x8, .LCPI18_1
; CHECK-GI-NEXT: adrp x9, .LCPI18_0
; CHECK-GI-NEXT: mov v4.16b, v2.16b
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1]
; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0]
; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
ret <16 x i16> %c
}
Expand Down Expand Up @@ -332,16 +398,23 @@ define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
}

define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){
; CHECK-LABEL: shufflevector_v4i8_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.4h, v0.h[0]
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
; CHECK-SD-LABEL: shufflevector_v4i8_zeroes:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.4h, v0.h[0]
; CHECK-SD-NEXT: xtn v0.8b, v0.8h
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v4i8_zeroes:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: dup v0.8b, w8
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
%c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%d = bitcast <4 x i8> %c to i32
ret i32 %d
Expand All @@ -358,19 +431,26 @@ define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){
}

define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){
; CHECK-LABEL: shufflevector_v2i16_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v1.2s, v0.s[0]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strh w9, [sp, #12]
; CHECK-NEXT: mov w8, v1.s[1]
; CHECK-NEXT: strh w8, [sp, #14]
; CHECK-NEXT: ldr w0, [sp, #12]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
; CHECK-SD-LABEL: shufflevector_v2i16_zeroes:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v1.2s, v0.s[0]
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: strh w9, [sp, #12]
; CHECK-SD-NEXT: mov w8, v1.s[1]
; CHECK-SD-NEXT: strh w8, [sp, #14]
; CHECK-SD-NEXT: ldr w0, [sp, #12]
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v2i16_zeroes:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: dup v0.4h, w8
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
%c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 0, i32 0>
%d = bitcast <2 x i16> %c to i32
ret i32 %d
Expand Down

0 comments on commit a344db7

Please sign in to comment.