diff --git a/README.md b/README.md index dba977e..96fcc10 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ To run specific tests, it is helpful to use the filter parameter: ``` -dotnet test --filter TooShortErrorAVX +dotnet test --filter TooShortErrorAvx2 ``` Or to target specific categories: diff --git a/src/UTF8.cs b/src/UTF8.cs index b957b95..87775b3 100644 --- a/src/UTF8.cs +++ b/src/UTF8.cs @@ -687,7 +687,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust( if (processedLength < inputLength) { - byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(32, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment, ref TailScalarCodeUnitCountAdjustment); + byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(processedLength, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment, ref TailScalarCodeUnitCountAdjustment); if (invalidBytePointer != pInputBuffer + inputLength) { utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment; @@ -834,7 +834,7 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust( // hardware: if (AdvSimd.Arm64.MaxAcross(Vector128.AsUInt32(error)).ToScalar() != 0) { - int off = processedLength > 32 ? processedLength - 32 : processedLength;// this does not backup ff processedlength = 32 + int off = processedLength > 32 ? processedLength - 32 : processedLength;// this does not backup off processedlength = 32 byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(off, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment, ref TailScalarCodeUnitCountAdjustment); utf16CodeUnitCountAdjustment = TailUtf16CodeUnitCountAdjustment; scalarCountAdjustment = TailScalarCodeUnitCountAdjustment; @@ -866,13 +866,12 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust( } // We have processed all the blocks using SIMD, we need to process the remaining bytes. // Process the remaining bytes with the scalar function - // worst possible case is 4 bytes, where we need to backtrack 3 bytes // 11110xxxx 10xxxxxx 10xxxxxx 10xxxxxx <== we might be pointing at the last byte if (processedLength < inputLength) { - byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(32, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment, ref TailScalarCodeUnitCountAdjustment); + byte* invalidBytePointer = SimdUnicode.UTF8.RewindAndValidateWithErrors(processedLength, pInputBuffer + processedLength, inputLength - processedLength, ref TailUtf16CodeUnitCountAdjustment, ref TailScalarCodeUnitCountAdjustment); if (invalidBytePointer != pInputBuffer + inputLength) { utf16CodeUnitCountAdjustment = TempUtf16CodeUnitCountAdjustment + TailUtf16CodeUnitCountAdjustment; diff --git a/test/UTF8ValidationTests.cs b/test/UTF8ValidationTests.cs index f7dcdf9..9b4a09a 100644 --- a/test/UTF8ValidationTests.cs +++ b/test/UTF8ValidationTests.cs @@ -114,11 +114,17 @@ public void simpleGoodSequencesScalar() [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void simpleGoodSequencesAVX() + public void simpleGoodSequencesAvx2() { simpleGoodSequences(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void simpleGoodSequencesArm64() + { + simpleGoodSequences(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } private void BadSequences(Utf8ValidationDelegate utf8ValidationDelegate) { @@ -177,11 +183,18 @@ public void BadSequencesScalar() [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void BadSequencesAVX() + public void BadSequencesAvx2() { BadSequences(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void BadSequencesArm64() + { + BadSequences(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } + // this was in the C++ code private void Node48995Test(Utf8ValidationDelegate utf8ValidationDelegate) { @@ -222,11 +235,18 @@ public void NoErrorScalar() [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void NoErrorAVX() + public void NoErrorAvx2() { NoError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void NoErrorArm64() + { + NoError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } + private void NoErrorSpecificByteCount(Utf8ValidationDelegate utf8ValidationDelegate) { RunTestForByteLength(1,utf8ValidationDelegate); @@ -268,11 +288,17 @@ public void NoErrorSpecificByteCountScalar() [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void NoErrorSpecificByteCountAVX() + public void NoErrorSpecificByteCountAvx2() { NoErrorSpecificByteCount(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void NoErrorSpecificByteCountArm64() + { + NoErrorSpecificByteCount(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } private void NoErrorIncompleteThenASCII(Utf8ValidationDelegate utf8ValidationDelegate) { foreach (int outputLength in outputLengths){ @@ -319,12 +345,18 @@ public void NoErrorIncompleteThenASCIIScalar() [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void NoErrorIncompleteThenASCIIAVX() + public void NoErrorIncompleteThenASCIIAvx2() { NoErrorIncompleteThenASCII(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void NoErrorIncompleteThenASCIIArm64() + { + NoErrorIncompleteThenASCII(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } private void NoErrorIncompleteAt256Vector(Utf8ValidationDelegate utf8ValidationDelegate) { @@ -370,11 +402,18 @@ public void NoErrorIncompleteAt256VectorScalar() [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void NoErrorIncompleteAt256VectorAVX() + public void NoErrorIncompleteAt256VectorAvx2() { NoErrorIncompleteAt256Vector(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void NoErrorIncompleteAt256VectorArm64() + { + NoErrorIncompleteAt256Vector(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } + private void BadHeaderBits(Utf8ValidationDelegate utf8ValidationDelegate) { foreach (int outputLength in outputLengths) @@ -419,11 +458,18 @@ public void BadHeaderBitsScalar() [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void BadHeaderBitsAVX() + public void BadHeaderBitsAvx2() { BadHeaderBits(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void BadHeaderBitsArm64() + { + BadHeaderBits(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } + private void TooShortError(Utf8ValidationDelegate utf8ValidationDelegate) { foreach (int outputLength in outputLengths) @@ -467,11 +513,18 @@ public void TooShortErrorScalar() [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void TooShortErrorAVX() + public void TooShortErrorAvx2() { TooShortError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void TooShortErrorArm64() + { + TooShortError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } + private void TooLongError(Utf8ValidationDelegate utf8ValidationDelegate) { @@ -515,11 +568,18 @@ public void TooLongErrorScalar() [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void TooLongErrorAVX() + public void TooLongErrorAvx2() { TooLongError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void TooLongErrorArm64() + { + TooLongError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } + private void OverlongError(Utf8ValidationDelegate utf8ValidationDelegate) { for (int trial = 0; trial < NumTrials; trial++) @@ -570,11 +630,11 @@ public void OverlongErrorScalar() OverlongError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar); } - [Trait("Category", "avx")] - [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void OverlongErrorAVX() + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void OverlongErrorArm64() { - OverlongError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); + OverlongError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); } @@ -632,12 +692,6 @@ public void TooShortErrorAtEndScalar() TooShortErrorAtEnd(SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar); } - [Trait("Category", "avx")] - [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void TooShortErrorAtEndAVX() - { - TooShortErrorAtEnd(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); - } [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] @@ -646,6 +700,12 @@ public void TooShortErrorAtEndAvx2() TooShortErrorAtEnd(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void TooShortErrorAtEndArm64() + { + TooShortErrorAtEnd(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } //corresponds to condition 5.4.1 in the paper private void Invalid0xf50xff(Utf8ValidationDelegate utf8ValidationDelegate) @@ -675,18 +735,20 @@ public void Invalid0xf50xffScalar() Invalid0xf50xff(SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar); } + [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void Invalid0xf50xffAVX() + public void Invalid0xf50xffAvx2() { Invalid0xf50xff(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } - [Trait("Category", "avx")] - [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void Invalid0xf50xffAvx2() + + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void Invalid0xf50xffArm64() { - Invalid0xf50xff(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); + Invalid0xf50xff(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); } // helper function for debugging: it prints a green byte every 32 bytes and a red byte at a given index @@ -793,6 +855,13 @@ public void TooLargeErrorAvx() } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void TooLargeErrorArm64() + { + TooLargeError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } + private void AsciiPlusContinuationAtEndError(Utf8ValidationDelegate utf8ValidationDelegate) { foreach (int outputLength in outputLengths) @@ -823,11 +892,11 @@ public void AsciiPlusContinuationAtEndErrorScalar() AsciiPlusContinuationAtEndError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteScalar); } - [Trait("Category", "avx")] - [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void AsciiPlusContinuationAtEndErrorAVX() + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void AsciiPlusContinuationAtEndErrorArm64() { - AsciiPlusContinuationAtEndError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); + AsciiPlusContinuationAtEndError(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); } [Trait("Category", "avx")] @@ -881,11 +950,17 @@ public void SurrogateErrorTestScalar() [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void SurrogateErrorTestAVX() + public void SurrogateErrorTestAvx2() { SurrogateErrorTest(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void SurrogateErrorTestArm64() + { + SurrogateErrorTest(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } private void BruteForceTest(Utf8ValidationDelegate utf8ValidationDelegate) { @@ -943,12 +1018,19 @@ public void BruteForceTestScalar() [Trait("Category", "avx")] [FactOnSystemRequirementAttribute(TestSystemRequirements.X64Avx2)] - public void BruteForceTestAVX() + public void BruteForceTestAvx2() { BruteForceTest(SimdUnicode.UTF8.GetPointerToFirstInvalidByteAvx2); } + [Trait("Category", "arm64")] + [FactOnSystemRequirementAttribute(TestSystemRequirements.Arm64)] + public void BruteForceTestArm64() + { + BruteForceTest(SimdUnicode.UTF8.GetPointerToFirstInvalidByteArm64); + } + // credit: based on code from Google Fuchsia (Apache Licensed) public static bool ValidateUtf8Fuschia(byte[] data) {