Skip to content

Commit

Permalink
Merge pull request #4366 from Sonicadvance1/sha256msg2_opt
Browse files Browse the repository at this point in the history
OpcodeDispatcher: Implement SHA256MSG2 using SHA256 operation
  • Loading branch information
lioncash authored Feb 19, 2025
2 parents d22bd9c + 9c9ddef commit 02d7261
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 446 deletions.
17 changes: 17 additions & 0 deletions FEXCore/Source/Interface/Core/JIT/EncryptionOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,23 @@ DEF_OP(VSha256U0) {
}
}

DEF_OP(VSha256U1) {
auto Op = IROp->C<IR::IROp_VSha256U1>();

const auto Dst = GetVReg(Node);
const auto Src1 = GetVReg(Op->Src1.ID());
const auto Src2 = GetVReg(Op->Src2.ID());

if (Dst != Src1 && Dst != Src1) {
movi(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), 0);
sha256su1(Dst, Src1, Src2);
} else {
movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0);
sha256su1(VTMP1, Src1, Src2);
mov(Dst.Q(), VTMP1.Q());
}
}

DEF_OP(PCLMUL) {
const auto Op = IROp->C<IR::IROp_PCLMUL>();
[[maybe_unused]] const auto OpSize = IROp->Size;
Expand Down
35 changes: 22 additions & 13 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,19 +222,28 @@ void OpDispatchBuilder::SHA256MSG2Op(OpcodeArgs) {
Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags);
Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);

auto W14 = _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Src, 2);
auto W15 = _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Src, 3);
auto W16 = _Add(OpSize::i32Bit, _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Dest, 0), Sigma1(W14));
auto W17 = _Add(OpSize::i32Bit, _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Dest, 1), Sigma1(W15));
auto W18 = _Add(OpSize::i32Bit, _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Dest, 2), Sigma1(W16));
auto W19 = _Add(OpSize::i32Bit, _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Dest, 3), Sigma1(W17));

auto D3 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 3, Dest, W19);
auto D2 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 2, D3, W18);
auto D1 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 1, D2, W17);
auto D0 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 0, D1, W16);

StoreResult(FPRClass, Op, D0, OpSize::iInvalid);
Ref Result;
if (CTX->HostFeatures.SupportsSHA) {
auto Src1 = _VExtr(OpSize::i128Bit, OpSize::i32Bit, Dest, Dest, 3);
auto DupDst = _VDupElement(OpSize::i128Bit, OpSize::i32Bit, Dest, 3);
auto Src2 = _VZip2(OpSize::i128Bit, OpSize::i64Bit, DupDst, Src);

Result = _VSha256U1(Src1, Src2);
} else {
auto W14 = _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Src, 2);
auto W15 = _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Src, 3);
auto W16 = _Add(OpSize::i32Bit, _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Dest, 0), Sigma1(W14));
auto W17 = _Add(OpSize::i32Bit, _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Dest, 1), Sigma1(W15));
auto W18 = _Add(OpSize::i32Bit, _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Dest, 2), Sigma1(W16));
auto W19 = _Add(OpSize::i32Bit, _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Dest, 3), Sigma1(W17));

auto D3 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 3, Dest, W19);
auto D2 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 2, D3, W18);
auto D1 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 1, D2, W17);
Result = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 0, D1, W16);
}

StoreResult(FPRClass, Op, Result, OpSize::iInvalid);
}

Ref OpDispatchBuilder::BitwiseAtLeastTwo(Ref A, Ref B, Ref C) {
Expand Down
5 changes: 5 additions & 0 deletions FEXCore/Source/Interface/IR/IR.json
Original file line number Diff line number Diff line change
Expand Up @@ -2658,6 +2658,11 @@
},
"FPR = VSha256U0 FPR:$Src1, FPR:$Src2": {
"Desc": "Does vector scalar VSha256U0 instruction",
"DestSize": "FEXCore::IR::OpSize::i128Bit",
"TiedSource": 0
},
"FPR = VSha256U1 FPR:$Src1, FPR:$Src2": {
"Desc": "Does vector scalar VSha256U1 instruction",
"DestSize": "FEXCore::IR::OpSize::i128Bit"
},
"GPR = CRC32 GPR:$Src1, GPR:$Src2, OpSize:$SrcSize": {
Expand Down
13 changes: 13 additions & 0 deletions unittests/InstructionCountCI/Crypto/H0F38.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@
"unimplemented (Unimplemented)"
]
},
"sha256msg2 xmm0, xmm1": {
"ExpectedInstructionCount": 5,
"Comment": [
"0x66 0x0f 0x38 0xcd"
],
"ExpectedArm64ASM": [
"ext v2.16b, v16.16b, v16.16b, #12",
"dup v3.4s, v16.s[3]",
"zip2 v3.2d, v3.2d, v17.2d",
"movi v16.2d, #0x0",
"unimplemented (Unimplemented)"
]
},
"aesimc xmm0, xmm1": {
"ExpectedInstructionCount": 1,
"Comment": [
Expand Down
Loading

0 comments on commit 02d7261

Please sign in to comment.