diff --git a/src/UTF8.cs b/src/UTF8.cs index 87775b3..a14f96d 100644 --- a/src/UTF8.cs +++ b/src/UTF8.cs @@ -10,6 +10,40 @@ namespace SimdUnicode public static class UTF8 { + // Returns &inputBuffer[inputLength] if the input buffer is valid. + /// + /// Given an input buffer of byte length , + /// returns a pointer to where the first invalid data appears in . + /// The parameter is set according to the content of the valid UTF-8 characters encountered, counting -1 for each 2-byte character, -2 for each 3-byte character, and -3 for each 4-byte character. + /// The parameter is set according to the content of the valid UTF-8 characters encountered, counting -1 for each 4-byte character. + /// + /// + /// Returns a pointer to the end of if the buffer is well-formed. + /// + public unsafe static byte* GetPointerToFirstInvalidByte(byte* pInputBuffer, int inputLength, out int Utf16CodeUnitCountAdjustment, out int ScalarCodeUnitCountAdjustment) + { + + if (AdvSimd.Arm64.IsSupported) + { + return GetPointerToFirstInvalidByteArm64(pInputBuffer, inputLength, out Utf16CodeUnitCountAdjustment, out ScalarCodeUnitCountAdjustment); + } + if (Avx2.IsSupported) + { + return GetPointerToFirstInvalidByteAvx2(pInputBuffer, inputLength, out Utf16CodeUnitCountAdjustment, out ScalarCodeUnitCountAdjustment); + } + /*if (Vector512.IsHardwareAccelerated && Avx512Vbmi2.IsSupported) + { + return GetPointerToFirstInvalidByteAvx512(pInputBuffer, inputLength); + }*/ + // if (Ssse3.IsSupported) + // { + // return GetPointerToFirstInvalidByteSse(pInputBuffer, inputLength); + // } + // return GetPointerToFirstInvalidByteScalar(pInputBuffer, inputLength); + + return GetPointerToFirstInvalidByteScalar(pInputBuffer, inputLength, out Utf16CodeUnitCountAdjustment, out ScalarCodeUnitCountAdjustment); + + } // prevents double counting in case there is a toolong error on the edge public static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(byte headerByte) { @@ -885,30 +919,6 @@ public unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust( scalarCountAdjustment = TempScalarCountAdjustment + TailScalarCodeUnitCountAdjustment; return pInputBuffer + inputLength; } - public unsafe static byte* GetPointerToFirstInvalidByte(byte* pInputBuffer, int inputLength, out int Utf16CodeUnitCountAdjustment, out int ScalarCodeUnitCountAdjustment) - { - - if (AdvSimd.Arm64.IsSupported) - { - return GetPointerToFirstInvalidByteArm64(pInputBuffer, inputLength, out Utf16CodeUnitCountAdjustment, out ScalarCodeUnitCountAdjustment); - } - if (Avx2.IsSupported) - { - return GetPointerToFirstInvalidByteAvx2(pInputBuffer, inputLength, out Utf16CodeUnitCountAdjustment, out ScalarCodeUnitCountAdjustment); - } - /*if (Vector512.IsHardwareAccelerated && Avx512Vbmi2.IsSupported) - { - return GetPointerToFirstInvalidByteAvx512(pInputBuffer, inputLength); - }*/ - // if (Ssse3.IsSupported) - // { - // return GetPointerToFirstInvalidByteSse(pInputBuffer, inputLength); - // } - // return GetPointerToFirstInvalidByteScalar(pInputBuffer, inputLength); - - return GetPointerToFirstInvalidByteScalar(pInputBuffer, inputLength, out Utf16CodeUnitCountAdjustment, out ScalarCodeUnitCountAdjustment); - - } } }