Skip to content

Commit

Permalink
Add support for Sve.StoreNonTemporal() (#102769)
Browse files Browse the repository at this point in the history
* Add support for Sve.StoreNonTemporal()

* Merge test templates

* Remove redundant template
  • Loading branch information
SwapnilGaikwad authored May 29, 2024
1 parent 5b0d959 commit 6f96391
Show file tree
Hide file tree
Showing 9 changed files with 196 additions and 373 deletions.
2 changes: 2 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26790,8 +26790,10 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore(GenTree** pAddr) const
case NI_Sve_StoreAndZipx3:
case NI_Sve_StoreAndZipx4:
case NI_Sve_StoreNarrowing:
case NI_Sve_StoreNonTemporal:
addr = Op(2);
break;

#endif // TARGET_ARM64

default:
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1436,6 +1436,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}

case NI_Sve_StoreAndZip:
case NI_Sve_StoreNonTemporal:
{
GetEmitter()->emitIns_R_R_R_I(ins, emitSize, op3Reg, op1Reg, op2Reg, 0, opt);
break;
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ HARDWARE_INTRINSIC(Sve, SignExtendWideningLower,
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, StoreAndZip, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreNarrowing, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreNonTemporal, -1, 3, true, {INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SubtractSaturate, -1, 2, true, {INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3255,6 +3255,69 @@ internal Arm64() { }
public static unsafe void StoreNarrowing(Vector<ulong> mask, uint* address, Vector<ulong> data) { throw new PlatformNotSupportedException(); }


/// Non-truncating store, non-temporal

/// <summary>
/// void svstnt1[_u8](svbool_t pg, uint8_t *base, svuint8_t data)
/// STNT1B Zdata.B, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<byte> mask, byte* address, Vector<byte> data) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svstnt1[_f64](svbool_t pg, float64_t *base, svfloat64_t data)
/// STNT1D Zdata.D, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<double> mask, double* address, Vector<double> data) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svstnt1[_s16](svbool_t pg, int16_t *base, svint16_t data)
/// STNT1H Zdata.H, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<short> mask, short* address, Vector<short> data) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svstnt1[_s32](svbool_t pg, int32_t *base, svint32_t data)
/// STNT1W Zdata.S, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<int> mask, int* address, Vector<int> data) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svstnt1[_s64](svbool_t pg, int64_t *base, svint64_t data)
/// STNT1D Zdata.D, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<long> mask, long* address, Vector<long> data) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svstnt1[_s8](svbool_t pg, int8_t *base, svint8_t data)
/// STNT1B Zdata.B, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<sbyte> mask, sbyte* address, Vector<sbyte> data) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svstnt1[_f32](svbool_t pg, float32_t *base, svfloat32_t data)
/// STNT1W Zdata.S, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<float> mask, float* address, Vector<float> data) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svstnt1[_u16](svbool_t pg, uint16_t *base, svuint16_t data)
/// STNT1H Zdata.H, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<ushort> mask, ushort* address, Vector<ushort> data) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svstnt1[_u32](svbool_t pg, uint32_t *base, svuint32_t data)
/// STNT1W Zdata.S, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<uint> mask, uint* address, Vector<uint> data) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svstnt1[_u64](svbool_t pg, uint64_t *base, svuint64_t data)
/// STNT1D Zdata.D, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<ulong> mask, ulong* address, Vector<ulong> data) { throw new PlatformNotSupportedException(); }


/// Subtract : Subtract

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3351,6 +3351,69 @@ internal Arm64() { }
public static unsafe void StoreNarrowing(Vector<ulong> mask, uint* address, Vector<ulong> data) => StoreNarrowing(mask, address, data);


/// StoreNonTemporal : Non-truncating store, non-temporal

/// <summary>
/// void svstnt1[_u8](svbool_t pg, uint8_t *base, svuint8_t data)
/// STNT1B Zdata.B, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<byte> mask, byte* address, Vector<byte> data) => StoreNonTemporal(mask, address, data);

/// <summary>
/// void svstnt1[_f64](svbool_t pg, float64_t *base, svfloat64_t data)
/// STNT1D Zdata.D, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<double> mask, double* address, Vector<double> data) => StoreNonTemporal(mask, address, data);

/// <summary>
/// void svstnt1[_s16](svbool_t pg, int16_t *base, svint16_t data)
/// STNT1H Zdata.H, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<short> mask, short* address, Vector<short> data) => StoreNonTemporal(mask, address, data);

/// <summary>
/// void svstnt1[_s32](svbool_t pg, int32_t *base, svint32_t data)
/// STNT1W Zdata.S, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<int> mask, int* address, Vector<int> data) => StoreNonTemporal(mask, address, data);

/// <summary>
/// void svstnt1[_s64](svbool_t pg, int64_t *base, svint64_t data)
/// STNT1D Zdata.D, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<long> mask, long* address, Vector<long> data) => StoreNonTemporal(mask, address, data);

/// <summary>
/// void svstnt1[_s8](svbool_t pg, int8_t *base, svint8_t data)
/// STNT1B Zdata.B, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<sbyte> mask, sbyte* address, Vector<sbyte> data) => StoreNonTemporal(mask, address, data);

/// <summary>
/// void svstnt1[_f32](svbool_t pg, float32_t *base, svfloat32_t data)
/// STNT1W Zdata.S, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<float> mask, float* address, Vector<float> data) => StoreNonTemporal(mask, address, data);

/// <summary>
/// void svstnt1[_u16](svbool_t pg, uint16_t *base, svuint16_t data)
/// STNT1H Zdata.H, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<ushort> mask, ushort* address, Vector<ushort> data) => StoreNonTemporal(mask, address, data);

/// <summary>
/// void svstnt1[_u32](svbool_t pg, uint32_t *base, svuint32_t data)
/// STNT1W Zdata.S, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<uint> mask, uint* address, Vector<uint> data) => StoreNonTemporal(mask, address, data);

/// <summary>
/// void svstnt1[_u64](svbool_t pg, uint64_t *base, svuint64_t data)
/// STNT1D Zdata.D, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void StoreNonTemporal(Vector<ulong> mask, ulong* address, Vector<ulong> data) => StoreNonTemporal(mask, address, data);


/// Subtract : Subtract

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4632,6 +4632,17 @@ internal Arm64() { }
public static unsafe void StoreNarrowing(System.Numerics.Vector<ulong> mask, ushort* address, System.Numerics.Vector<ulong> data) { throw null; }
public static unsafe void StoreNarrowing(System.Numerics.Vector<ulong> mask, uint* address, System.Numerics.Vector<ulong> data) { throw null; }

public static unsafe void StoreNonTemporal(System.Numerics.Vector<byte> mask, byte* address, System.Numerics.Vector<byte> data) { throw null; }
public static unsafe void StoreNonTemporal(System.Numerics.Vector<double> mask, double* address, System.Numerics.Vector<double> data) { throw null; }
public static unsafe void StoreNonTemporal(System.Numerics.Vector<short> mask, short* address, System.Numerics.Vector<short> data) { throw null; }
public static unsafe void StoreNonTemporal(System.Numerics.Vector<int> mask, int* address, System.Numerics.Vector<int> data) { throw null; }
public static unsafe void StoreNonTemporal(System.Numerics.Vector<long> mask, long* address, System.Numerics.Vector<long> data) { throw null; }
public static unsafe void StoreNonTemporal(System.Numerics.Vector<sbyte> mask, sbyte* address, System.Numerics.Vector<sbyte> data) { throw null; }
public static unsafe void StoreNonTemporal(System.Numerics.Vector<float> mask, float* address, System.Numerics.Vector<float> data) { throw null; }
public static unsafe void StoreNonTemporal(System.Numerics.Vector<ushort> mask, ushort* address, System.Numerics.Vector<ushort> data) { throw null; }
public static unsafe void StoreNonTemporal(System.Numerics.Vector<uint> mask, uint* address, System.Numerics.Vector<uint> data) { throw null; }
public static unsafe void StoreNonTemporal(System.Numerics.Vector<ulong> mask, ulong* address, System.Numerics.Vector<ulong> data) { throw null; }

public static System.Numerics.Vector<sbyte> Subtract(System.Numerics.Vector<sbyte> left, System.Numerics.Vector<sbyte> right) { throw null; }
public static System.Numerics.Vector<short> Subtract(System.Numerics.Vector<short> left, System.Numerics.Vector<short> right) { throw null; }
public static System.Numerics.Vector<int> Subtract(System.Numerics.Vector<int> left, System.Numerics.Vector<int> right) { throw null; }
Expand Down
Loading

0 comments on commit 6f96391

Please sign in to comment.