FEX-Emu · lioncash · Feb 23, 2025 · Feb 23, 2025 · Feb 23, 2025 · Feb 23, 2025
diff --git a/CodeEmitter/CodeEmitter/SVEOps.inl b/CodeEmitter/CodeEmitter/SVEOps.inl
@@ -5029,7 +5029,7 @@ private:
   void SVE2IntegerMultiplyLong(uint32_t SUT, SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
     // PMULLB and PMULLT support the use of 128-bit element sizes (with the SVE2PMULL128 extension)
     if (SUT == 0b010 || SUT == 0b011) {
-      LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Can't use 8-bit element size");
+      LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i32Bit, "Can't use 8-bit or 32-bit element size");
 
       // 128-bit variant is encoded as if it were 8-bit (0b00)
       if (size == SubRegSize::i128Bit) {

diff --git a/External/vixl b/External/vixl
diff --git a/FEXCore/unittests/Emitter/SVE_Tests.cpp b/FEXCore/unittests/Emitter/SVE_Tests.cpp
@@ -2963,12 +2963,12 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer multiply long") {
 
   // TEST_SINGLE(pmullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.b, z29.b, z28.b");
   TEST_SINGLE(pmullb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.h, z29.b, z28.b");
-  TEST_SINGLE(pmullb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.s, z29.h, z28.h");
+  // TEST_SINGLE(pmullb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.s, z29.h, z28.h");
   TEST_SINGLE(pmullb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.d, z29.s, z28.s");
 
   // TEST_SINGLE(pmullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.b, z29.b, z28.b");
   TEST_SINGLE(pmullt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.h, z29.b, z28.b");
-  TEST_SINGLE(pmullt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.s, z29.h, z28.h");
+  // TEST_SINGLE(pmullt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.s, z29.h, z28.h");
   TEST_SINGLE(pmullt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.d, z29.s, z28.s");
 
   // TEST_SINGLE(smullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullb z30.b, z29.b, z28.b");

diff --git a/FEXCore/unittests/Emitter/System_Tests.cpp b/FEXCore/unittests/Emitter/System_Tests.cpp
@@ -110,15 +110,14 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: System: Barriers") {
 
   TEST_SINGLE(isb(), "isb");
 
-  // vixl has a decoding bug claiming these are system level instructions.
-  TEST_SINGLE(sb(), "sb (System)");
-  TEST_SINGLE(tcommit(), "tcommit (System)");
+  TEST_SINGLE(sb(), "sb");
+  TEST_SINGLE(tcommit(), "tcommit");
 }
 TEST_CASE_METHOD(TestDisassembler, "Emitter: System: System register move") {
   // vixl doesn't have decoding for a bunch of these.
   // Also most of these aren't writeable from el0, just testing the encoding.
   TEST_SINGLE(msr(SystemRegister::CTR_EL0, Reg::r30), "msr S3_3_c0_c0_1, x30");
-  TEST_SINGLE(msr(SystemRegister::DCZID_EL0, Reg::r30), "msr S3_3_c0_c0_7, x30");
+  TEST_SINGLE(msr(SystemRegister::DCZID_EL0, Reg::r30), "msr dczid_el0, x30");
   TEST_SINGLE(msr(SystemRegister::TPIDR_EL0, Reg::r30), "msr S3_3_c13_c0_2, x30");
   TEST_SINGLE(msr(SystemRegister::RNDR, Reg::r30), "msr rndr, x30");
   TEST_SINGLE(msr(SystemRegister::RNDRRS, Reg::r30), "msr rndrrs, x30");
@@ -129,7 +128,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: System: System register move") {
   TEST_SINGLE(msr(SystemRegister::CNTVCT_EL0, Reg::r30), "msr S3_3_c14_c0_2, x30");
 
   TEST_SINGLE(mrs(Reg::r30, SystemRegister::CTR_EL0), "mrs x30, S3_3_c0_c0_1");
-  TEST_SINGLE(mrs(Reg::r30, SystemRegister::DCZID_EL0), "mrs x30, S3_3_c0_c0_7");
+  TEST_SINGLE(mrs(Reg::r30, SystemRegister::DCZID_EL0), "mrs x30, dczid_el0");
   TEST_SINGLE(mrs(Reg::r30, SystemRegister::TPIDR_EL0), "mrs x30, S3_3_c13_c0_2");
   TEST_SINGLE(mrs(Reg::r30, SystemRegister::RNDR), "mrs x30, rndr");
   TEST_SINGLE(mrs(Reg::r30, SystemRegister::RNDRRS), "mrs x30, rndrrs");

diff --git a/Source/Common/HostFeatures.cpp b/Source/Common/HostFeatures.cpp
@@ -543,10 +543,14 @@ FEXCore::HostFeatures FetchHostFeatures(FEX::CPUFeatures& Features, bool Support
 #endif
 
 #ifdef VIXL_SIMULATOR
-  // simulator doesn't support dc(ZVA)
-  HostFeatures.SupportsCLZERO = false;
-  // Simulator doesn't support SHA
-  HostFeatures.SupportsSHA = false;
+  // simulator has a hardcoded ZVA size of 64-bytes.
+  HostFeatures.SupportsCLZERO = true;
+  HostFeatures.SupportsAES = true;
+  HostFeatures.SupportsCRC = true;
+  HostFeatures.SupportsAVX = true;
+  HostFeatures.SupportsSHA = true;
+  HostFeatures.SupportsPMULL_128Bit = true;
+  HostFeatures.SupportsAES256 = true;
 #else
   // Check if we can support cacheline clears
   uint32_t DCZID = GetDCZID();

diff --git a/unittests/ASM/Disabled_Tests_Simulator b/unittests/ASM/Disabled_Tests_Simulator
@@ -10,27 +10,13 @@ Test_X87/precision_test_neg_ftan.asm
 Test_X87/precision_test_neg_fatan.asm
 Test_X87/precision_test_neg_fyl2xp1.asm
 
-# AES unsupported in simulator
-Test_H0F38/66_DB.asm
-Test_H0F38/66_DC.asm
-Test_H0F38/66_DD.asm
-Test_H0F38/66_DE.asm
-Test_H0F38/66_DF.asm
-Test_H0F3A/0_66_DF.asm
+# AES and vpclmul unsupported in 256-bit SVE currently
 Test_VEX/vaesdec.asm
 Test_VEX/vaesdeclast.asm
-Test_VEX/vaesenc.asm
-Test_VEX/vaesenclast.asm
-Test_VEX/vaesimc.asm
 Test_VEX/vaesdec256.asm
 Test_VEX/vaesdeclast256.asm
 Test_VEX/vaesenc256.asm
 Test_VEX/vaesenclast256.asm
-Test_VEX/vaeskeygenassist.asm
-
-# PCMUL considered to be part of crypto operations. Simulator doesn't support this.
-Test_H0F3A/pclmulqdq.asm
-Test_VEX/vpclmulqdq.asm
 Test_VEX/vpclmulqdq_256.asm
 
 # Simulator can't handle self-modifying code

diff --git a/unittests/InstructionCountCI/AVX128/VEX_map2.json b/unittests/InstructionCountCI/AVX128/VEX_map2.json
@@ -5920,7 +5920,7 @@
         "Map 2 0b01 0xdb 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "unimplemented (Unimplemented)",
+        "aesimc v16.16b, v17.16b",
         "movi v2.2d, #0x0",
         "str q2, [x28, #16]"
       ]
@@ -5933,8 +5933,8 @@
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
         "mov v0.16b, v17.16b",
-        "unimplemented (Unimplemented)",
-        "unimplemented (Unimplemented)",
+        "aese v0.16b, v2.16b",
+        "aesmc v0.16b, v0.16b",
         "eor v16.16b, v0.16b, v18.16b",
         "str q2, [x28, #16]"
       ]
@@ -5949,12 +5949,12 @@
         "ldr q3, [x28, #32]",
         "ldr q4, [x28, #48]",
         "mov v0.16b, v17.16b",
-        "unimplemented (Unimplemented)",
-        "unimplemented (Unimplemented)",
+        "aese v0.16b, v2.16b",
+        "aesmc v0.16b, v0.16b",
         "eor v16.16b, v0.16b, v18.16b",
         "mov v0.16b, v3.16b",
-        "unimplemented (Unimplemented)",
-        "unimplemented (Unimplemented)",
+        "aese v0.16b, v2.16b",
+        "aesmc v0.16b, v0.16b",
         "eor v2.16b, v0.16b, v4.16b",
         "str q2, [x28, #16]"
       ]
@@ -5967,7 +5967,7 @@
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
         "mov v0.16b, v17.16b",
-        "unimplemented (Unimplemented)",
+        "aese v0.16b, v2.16b",
         "eor v16.16b, v0.16b, v18.16b",
         "str q2, [x28, #16]"
       ]
@@ -5982,10 +5982,10 @@
         "ldr q3, [x28, #32]",
         "ldr q4, [x28, #48]",
         "mov v0.16b, v17.16b",
-        "unimplemented (Unimplemented)",
+        "aese v0.16b, v2.16b",
         "eor v16.16b, v0.16b, v18.16b",
         "mov v0.16b, v3.16b",
-        "unimplemented (Unimplemented)",
+        "aese v0.16b, v2.16b",
         "eor v2.16b, v0.16b, v4.16b",
         "str q2, [x28, #16]"
       ]
@@ -5998,8 +5998,8 @@
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
         "mov v0.16b, v17.16b",
-        "unimplemented (Unimplemented)",
-        "unimplemented (Unimplemented)",
+        "aesd v0.16b, v2.16b",
+        "aesimc v0.16b, v0.16b",
         "eor v16.16b, v0.16b, v18.16b",
         "str q2, [x28, #16]"
       ]
@@ -6014,12 +6014,12 @@
         "ldr q3, [x28, #32]",
         "ldr q4, [x28, #48]",
         "mov v0.16b, v17.16b",
-        "unimplemented (Unimplemented)",
-        "unimplemented (Unimplemented)",
+        "aesd v0.16b, v2.16b",
+        "aesimc v0.16b, v0.16b",
         "eor v16.16b, v0.16b, v18.16b",
         "mov v0.16b, v3.16b",
-        "unimplemented (Unimplemented)",
-        "unimplemented (Unimplemented)",
+        "aesd v0.16b, v2.16b",
+        "aesimc v0.16b, v0.16b",
         "eor v2.16b, v0.16b, v4.16b",
         "str q2, [x28, #16]"
       ]
@@ -6032,7 +6032,7 @@
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
         "mov v0.16b, v17.16b",
-        "unimplemented (Unimplemented)",
+        "aesd v0.16b, v2.16b",
         "eor v16.16b, v0.16b, v18.16b",
         "str q2, [x28, #16]"
       ]
@@ -6047,10 +6047,10 @@
         "ldr q3, [x28, #32]",
         "ldr q4, [x28, #48]",
         "mov v0.16b, v17.16b",
-        "unimplemented (Unimplemented)",
+        "aesd v0.16b, v2.16b",
         "eor v16.16b, v0.16b, v18.16b",
         "mov v0.16b, v3.16b",
-        "unimplemented (Unimplemented)",
+        "aesd v0.16b, v2.16b",
         "eor v2.16b, v0.16b, v4.16b",
         "str q2, [x28, #16]"
       ]

diff --git a/unittests/InstructionCountCI/AVX128/VEX_map3.json b/unittests/InstructionCountCI/AVX128/VEX_map3.json
@@ -4024,7 +4024,7 @@
         "movi v2.2d, #0x0",
         "ldr q3, [x28, #2576]",
         "mov v16.16b, v17.16b",
-        "unimplemented (Unimplemented)",
+        "aese v16.16b, v2.16b",
         "tbl v16.16b, {v16.16b}, v3.16b",
         "str q2, [x28, #16]"
       ]
@@ -4038,7 +4038,7 @@
         "movi v2.2d, #0x0",
         "ldr q3, [x28, #2576]",
         "mov v16.16b, v17.16b",
-        "unimplemented (Unimplemented)",
+        "aese v16.16b, v2.16b",
         "tbl v16.16b, {v16.16b}, v3.16b",
         "mov x0, #0xff00000000",
         "dup v1.2d, x0",

diff --git a/unittests/InstructionCountCI/Crypto/H0F38.json b/unittests/InstructionCountCI/Crypto/H0F38.json
@@ -18,7 +18,7 @@
       ],
       "ExpectedArm64ASM": [
         "dup v2.4s, v16.s[3]",
-        "unimplemented (Unimplemented)",
+        "sha1h s2, s2",
         "dup v2.4s, v2.s[0]",
         "add v2.4s, v17.4s, v2.4s",
         "mov v16.16b, v17.16b",
@@ -35,7 +35,7 @@
         "ext v2.16b, v2.16b, v2.16b, #8",
         "rev64 v3.4s, v17.4s",
         "ext v3.16b, v3.16b, v3.16b, #8",
-        "unimplemented (Unimplemented)",
+        "sha1su1 v2.4s, v3.4s",
         "rev64 v2.4s, v2.4s",
         "ext v16.16b, v2.16b, v2.16b, #8"
       ]
@@ -46,7 +46,7 @@
         "0x66 0x0f 0x38 0xcc"
       ],
       "ExpectedArm64ASM": [
-        "unimplemented (Unimplemented)"
+        "sha256su0 v16.4s, v17.4s"
       ]
     },
     "sha256msg2 xmm0, xmm1": {
@@ -59,7 +59,7 @@
         "dup v3.4s, v16.s[3]",
         "zip2 v3.2d, v3.2d, v17.2d",
         "movi v16.2d, #0x0",
-        "unimplemented (Unimplemented)"
+        "sha256su1 v16.4s, v2.4s, v3.4s"
       ]
     },
     "aesimc xmm0, xmm1": {
@@ -68,7 +68,7 @@
         "0x66 0x0f 0x38 0xdb"
       ],
       "ExpectedArm64ASM": [
-        "unimplemented (Unimplemented)"
+        "aesimc v16.16b, v17.16b"
       ]
     },
     "aesenc xmm0, xmm1": {
@@ -78,8 +78,8 @@
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "unimplemented (Unimplemented)",
-        "unimplemented (Unimplemented)",
+        "aese v16.16b, v2.16b",
+        "aesmc v16.16b, v16.16b",
         "eor v16.16b, v16.16b, v17.16b"
       ]
     },
@@ -90,7 +90,7 @@
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "unimplemented (Unimplemented)",
+        "aese v16.16b, v2.16b",
         "eor v16.16b, v16.16b, v17.16b"
       ]
     },
@@ -101,8 +101,8 @@
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "unimplemented (Unimplemented)",
-        "unimplemented (Unimplemented)",
+        "aesd v16.16b, v2.16b",
+        "aesimc v16.16b, v16.16b",
         "eor v16.16b, v16.16b, v17.16b"
       ]
     },
@@ -113,7 +113,7 @@
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "unimplemented (Unimplemented)",
+        "aesd v16.16b, v2.16b",
         "eor v16.16b, v16.16b, v17.16b"
       ]
     },

diff --git a/unittests/InstructionCountCI/Crypto/H0F3A.json b/unittests/InstructionCountCI/Crypto/H0F3A.json
@@ -58,7 +58,7 @@
         "ldr q2, [x28, #2576]",
         "movi v3.2d, #0x0",
         "mov v16.16b, v17.16b",
-        "unimplemented (Unimplemented)",
+        "aese v16.16b, v3.16b",
         "tbl v16.16b, {v16.16b}, v2.16b"
       ]
     },
@@ -71,7 +71,7 @@
         "ldr q2, [x28, #2576]",
         "movi v3.2d, #0x0",
         "mov v16.16b, v17.16b",
-        "unimplemented (Unimplemented)",
+        "aese v16.16b, v3.16b",
         "tbl v16.16b, {v16.16b}, v2.16b",
         "mov x0, #0xff00000000",
         "dup v1.2d, x0",
@@ -91,7 +91,7 @@
         "rev64 v5.4s, v17.4s",
         "ext v5.16b, v5.16b, v5.16b, #8",
         "add v2.4s, v5.4s, v2.4s",
-        "unimplemented (Unimplemented)",
+        "sha1c q4, s3, v2.4s",
         "rev64 v2.4s, v4.4s",
         "ext v16.16b, v2.16b, v2.16b, #8"
       ]
@@ -109,7 +109,7 @@
         "rev64 v5.4s, v17.4s",
         "ext v5.16b, v5.16b, v5.16b, #8",
         "add v2.4s, v5.4s, v2.4s",
-        "unimplemented (Unimplemented)",
+        "sha1p q4, s3, v2.4s",
         "rev64 v2.4s, v4.4s",
         "ext v16.16b, v2.16b, v2.16b, #8"
       ]
@@ -127,7 +127,7 @@
         "rev64 v5.4s, v17.4s",
         "ext v5.16b, v5.16b, v5.16b, #8",
         "add v2.4s, v5.4s, v2.4s",
-        "unimplemented (Unimplemented)",
+        "sha1m q4, s3, v2.4s",
         "rev64 v2.4s, v4.4s",
         "ext v16.16b, v2.16b, v2.16b, #8"
       ]
@@ -145,7 +145,7 @@
         "rev64 v5.4s, v17.4s",
         "ext v5.16b, v5.16b, v5.16b, #8",
         "add v2.4s, v5.4s, v2.4s",
-        "unimplemented (Unimplemented)",
+        "sha1p q4, s3, v2.4s",
         "rev64 v2.4s, v4.4s",
         "ext v16.16b, v2.16b, v2.16b, #8"
       ]
+3 −1		SConstruct
+148 −0		doc/range-limits.md
+5 −10		src/aarch32/instructions-aarch32.cc
+8 −0		src/aarch32/location-aarch32.h
+47 −0		src/aarch32/macro-assembler-aarch32.cc
+6 −0		src/aarch32/macro-assembler-aarch32.h
+295 −2		src/aarch64/assembler-aarch64.cc
+135 −0		src/aarch64/assembler-aarch64.h
+6 −6		src/aarch64/assembler-sve-aarch64.cc
+13 −3		src/aarch64/constants-aarch64.h
+131 −78		src/aarch64/cpu-features-auditor-aarch64.cc
+3 −1		src/aarch64/cpu-features-auditor-aarch64.h
+1 −2		src/aarch64/debugger-aarch64.cc
+1 −1		src/aarch64/debugger-aarch64.h
+1 −1		src/aarch64/decoder-constants-aarch64.h
+28 −35		src/aarch64/decoder-visitor-map-aarch64.h
+145 −37		src/aarch64/disasm-aarch64.cc
+9 −0		src/aarch64/disasm-aarch64.h
+24 −0		src/aarch64/instructions-aarch64.cc
+12 −5		src/aarch64/instructions-aarch64.h
+749 −86		src/aarch64/logic-aarch64.cc
+33 −13		src/aarch64/macro-assembler-aarch64.cc
+94 −3		src/aarch64/macro-assembler-aarch64.h
+1 −1		src/aarch64/pointer-auth-aarch64.cc
+525 −145		src/aarch64/simulator-aarch64.cc
+297 −15		src/aarch64/simulator-aarch64.h
+2 −1		src/cpu-features.h
+22 −1		src/invalset-vixl.h
+1 −1		src/pool-manager-impl.h
+47 −2		src/utils-vixl.h
+20 −18		test/aarch32/test-assembler-aarch32.cc
+35 −0		test/aarch32/test-disasm-a32.cc
+19 −0		test/aarch64/test-api-aarch64.cc
+859 −206		test/aarch64/test-assembler-aarch64.cc
+242 −783		test/aarch64/test-assembler-fp-aarch64.cc
+4 −23		test/aarch64/test-assembler-neon-aarch64.cc
+2 −0		test/aarch64/test-assembler-sve-aarch64.cc
+79 −0		test/aarch64/test-cpu-features-aarch64.cc
+37 −25		test/aarch64/test-disasm-aarch64.cc
+98 −0		test/aarch64/test-disasm-neon-aarch64.cc
+8 −3		test/aarch64/test-disasm-sve-aarch64.cc
+1 −1		test/aarch64/test-simulator-aarch64.cc
+1,644 −0		test/aarch64/test-simulator-sve-aarch64.cc
+125 −0		test/aarch64/test-simulator-sve2-aarch64.cc
+38 −0		test/aarch64/test-utils-aarch64.cc
+8 −0		test/aarch64/test-utils-aarch64.h
+22 −0		test/test-invalset.cc
+2 −2		test/test-pool-manager.cc
+9 −0		tools/code_coverage.log
+2 −2		tools/lint.py
+2 −2		tools/util.py