From e4ad7dad4fb12a54461101569d3db6b46192080c Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 5 Jul 2023 15:37:19 -0700 Subject: [PATCH 01/14] merging with main Enabling AVX512 for ASCII.Equals --- .../src/System/Text/Ascii.Equality.cs | 52 ++++++++++++++++++- .../src/System/Text/Ascii.Utility.cs | 17 ++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index a2d60599fae398..0bc2d4aac497fe 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -61,7 +61,45 @@ private static bool Equals(ref TLeft left, ref TRight ri } } } - else if (Avx.IsSupported && length >= (uint)Vector256.Count) + else if (Avx512F.IsSupported && length >= (uint)Vector512.Count) + { + ref TLeft currentLeftSearchSpace = ref left; + ref TLeft oneVectorAwayFromLeftEnd = ref Unsafe.Add(ref currentLeftSearchSpace, length - TLoader.Count512); + ref TRight currentRightSearchSpace = ref right; + ref TRight oneVectorAwayFromRightEnd = ref Unsafe.Add(ref currentRightSearchSpace, length - (uint)Vector512.Count); + + Vector512 leftValues; + Vector512 rightValues; + + // Loop until either we've finished all elements or there's less than a vector's-worth remaining. + do + { + leftValues = TLoader.Load512(ref currentLeftSearchSpace); + rightValues = Vector512.LoadUnsafe(ref currentRightSearchSpace); + + if (leftValues != rightValues || !AllCharsInVectorAreAscii(leftValues | rightValues)) + { + return false; + } + + currentRightSearchSpace = ref Unsafe.Add(ref currentRightSearchSpace, Vector256.Count); + currentLeftSearchSpace = ref Unsafe.Add(ref currentLeftSearchSpace, TLoader.Count256); + } + while (!Unsafe.IsAddressGreaterThan(ref currentRightSearchSpace, ref oneVectorAwayFromRightEnd)); + + // If any elements remain, process the last vector in the search space. + if (length % (uint)Vector256.Count != 0) + { + leftValues = TLoader.Load512(ref oneVectorAwayFromLeftEnd); + rightValues = Vector512.LoadUnsafe(ref oneVectorAwayFromRightEnd); + + if (leftValues != rightValues || !AllCharsInVectorAreAscii(leftValues | rightValues)) + { + return false; + } + } + } + else if (Avx.IsSupported && length >= (uint)Vector256.Count) { ref TLeft currentLeftSearchSpace = ref left; ref TRight currentRightSearchSpace = ref right; @@ -353,8 +391,10 @@ private interface ILoader { static abstract nuint Count128 { get; } static abstract nuint Count256 { get; } + static abstract nuint Count512 { get; } static abstract Vector128 Load128(ref TLeft ptr); static abstract Vector256 Load256(ref TLeft ptr); + static abstract Vector512 Load512(ref TLeft ptr); static abstract bool EqualAndAscii(ref TLeft left, ref TRight right); } @@ -362,6 +402,7 @@ private interface ILoader { public static nuint Count128 => (uint)Vector128.Count; public static nuint Count256 => (uint)Vector256.Count; + public static nuint Count512 => (uint)Vector512.Count; public static Vector128 Load128(ref T ptr) => Vector128.LoadUnsafe(ref ptr); public static Vector256 Load256(ref T ptr) => Vector256.LoadUnsafe(ref ptr); @@ -379,12 +420,15 @@ public static bool EqualAndAscii(ref T left, ref T right) return true; } + + public static Vector512 Load512(ref T ptr) => Vector512.LoadUnsafe(ref ptr); } private readonly struct WideningLoader : ILoader { public static nuint Count128 => sizeof(long); public static nuint Count256 => (uint)Vector128.Count; + public static nuint Count512 => (uint)Vector512.Count; [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Load128(ref byte ptr) @@ -437,6 +481,12 @@ public static bool EqualAndAscii(ref byte utf8, ref ushort utf16) return true; } + + public static Vector512 Load512(ref byte ptr) + { + (Vector256 lower, Vector256 upper) = Vector256.Widen(Vector256.LoadUnsafe(ref ptr)); + return Vector512.Create(lower, upper); + } } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs index 69b3cc125c0865..7682bf2b32a78c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs @@ -1499,6 +1499,23 @@ private static bool AllCharsInVectorAreAscii(Vector256 vector) } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx512F))] + private static bool AllCharsInVectorAreAscii(Vector512 vector) + where T : unmanaged + { + Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(ushort)); + + if (typeof(T) == typeof(byte)) + { + return vector.AsByte().ExtractMostSignificantBits() == 0; + } + else + { + return ((vector.AsUInt16() & Vector512.Create((ushort)0xFF80)) == Vector512.Zero); + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128 ExtractAsciiVector(Vector128 vectorFirst, Vector128 vectorSecond) { From 3aa55beb1deb657c43048d568494b1f33c5b6d81 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 5 Jul 2023 17:11:31 -0700 Subject: [PATCH 02/14] Correcting defects in the new Equals for AVX512 case --- .../System.Private.CoreLib/src/System/Text/Ascii.Equality.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index 0bc2d4aac497fe..2607b79582a7a8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -82,8 +82,8 @@ private static bool Equals(ref TLeft left, ref TRight ri return false; } - currentRightSearchSpace = ref Unsafe.Add(ref currentRightSearchSpace, Vector256.Count); - currentLeftSearchSpace = ref Unsafe.Add(ref currentLeftSearchSpace, TLoader.Count256); + currentRightSearchSpace = ref Unsafe.Add(ref currentRightSearchSpace, Vector512.Count); + currentLeftSearchSpace = ref Unsafe.Add(ref currentLeftSearchSpace, TLoader.Count512); } while (!Unsafe.IsAddressGreaterThan(ref currentRightSearchSpace, ref oneVectorAwayFromRightEnd)); From d1912d681c53cf792dcba2136915ff6cee948541 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 5 Jul 2023 17:15:35 -0700 Subject: [PATCH 03/14] Correcting defects --- .../System.Private.CoreLib/src/System/Text/Ascii.Equality.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index 2607b79582a7a8..d6634f3591fbc8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -88,7 +88,7 @@ private static bool Equals(ref TLeft left, ref TRight ri while (!Unsafe.IsAddressGreaterThan(ref currentRightSearchSpace, ref oneVectorAwayFromRightEnd)); // If any elements remain, process the last vector in the search space. - if (length % (uint)Vector256.Count != 0) + if (length % (uint)Vector512.Count != 0) { leftValues = TLoader.Load512(ref oneVectorAwayFromLeftEnd); rightValues = Vector512.LoadUnsafe(ref oneVectorAwayFromRightEnd); From 67d4426cbd719000af8f56ea528820a3031dd4ea Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 6 Jul 2023 12:40:08 -0700 Subject: [PATCH 04/14] Upgrading ASCII.Equality.EqualsIgnoreCase --- .../src/System/Text/Ascii.Equality.cs | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index d6634f3591fbc8..92785a8483b3f5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -236,6 +236,78 @@ private static bool EqualsIgnoreCase(ref TLeft left, ref } } } + else if (Avx512F.IsSupported && length >= (uint)Vector512.Count) + { + ref TLeft currentLeftSearchSpace = ref left; + ref TLeft oneVectorAwayFromLeftEnd = ref Unsafe.Add(ref currentLeftSearchSpace, length - TLoader.Count512); + ref TRight currentRightSearchSpace = ref right; + ref TRight oneVectorAwayFromRightEnd = ref Unsafe.Add(ref currentRightSearchSpace, length - (uint)Vector512.Count); + + Vector512 leftValues; + Vector512 rightValues; + + Vector512 loweringMask = Vector512.Create(TRight.CreateTruncating(0x20)); + Vector512 vecA = Vector512.Create(TRight.CreateTruncating('a')); + Vector512 vecZMinusA = Vector512.Create(TRight.CreateTruncating(('z' - 'a'))); + + // Loop until either we've finished all elements or there's less than a vector's-worth remaining. + do + { + leftValues = TLoader.Load512(ref currentLeftSearchSpace); + rightValues = Vector512.LoadUnsafe(ref currentRightSearchSpace); + + if (!AllCharsInVectorAreAscii(leftValues | rightValues)) + { + return false; + } + + Vector512 notEquals = ~Vector512.Equals(leftValues, rightValues); + + if (notEquals != Vector512.Zero) + { + // not exact match + + leftValues |= loweringMask; + rightValues |= loweringMask; + + if (Vector512.GreaterThanAny((leftValues - vecA) & notEquals, vecZMinusA) || leftValues != rightValues) + { + return false; // first input isn't in [A-Za-z], and not exact match of lowered + } + } + + currentRightSearchSpace = ref Unsafe.Add(ref currentRightSearchSpace, (uint)Vector512.Count); + currentLeftSearchSpace = ref Unsafe.Add(ref currentLeftSearchSpace, TLoader.Count512); + } + while (!Unsafe.IsAddressGreaterThan(ref currentRightSearchSpace, ref oneVectorAwayFromRightEnd)); + + // If any elements remain, process the last vector in the search space. + if (length % (uint)Vector512.Count != 0) + { + leftValues = TLoader.Load512(ref oneVectorAwayFromLeftEnd); + rightValues = Vector512.LoadUnsafe(ref oneVectorAwayFromRightEnd); + + if (!AllCharsInVectorAreAscii(leftValues | rightValues)) + { + return false; + } + + Vector512 notEquals = ~Vector512.Equals(leftValues, rightValues); + + if (notEquals != Vector512.Zero) + { + // not exact match + + leftValues |= loweringMask; + rightValues |= loweringMask; + + if (Vector512.GreaterThanAny((leftValues - vecA) & notEquals, vecZMinusA) || leftValues != rightValues) + { + return false; // first input isn't in [A-Za-z], and not exact match of lowered + } + } + } + } else if (Avx.IsSupported && length >= (uint)Vector256.Count) { ref TLeft currentLeftSearchSpace = ref left; From 6abe1ac52f08af279d83bcf2bce8d18707d5aaf9 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 6 Jul 2023 13:58:12 -0700 Subject: [PATCH 05/14] Using intrinsics in AllCharsInVectorAreAscii --- .../src/System/Text/Ascii.Utility.cs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs index 7682bf2b32a78c..849ae0e122ca0b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs @@ -1508,11 +1508,15 @@ private static bool AllCharsInVectorAreAscii(Vector512 vector) if (typeof(T) == typeof(byte)) { - return vector.AsByte().ExtractMostSignificantBits() == 0; + return + Avx512F.IsSupported ? Vector512.EqualsAll(Vector512.BitwiseAnd(vector.AsByte(), Vector512.Create((byte)0x80)), Vector512.Zero) : + vector.AsByte().ExtractMostSignificantBits() == 0; } else { - return ((vector.AsUInt16() & Vector512.Create((ushort)0xFF80)) == Vector512.Zero); + return + Avx512F.IsSupported ? Vector512.EqualsAll(Vector512.BitwiseAnd(vector.AsUInt16(), Vector512.Create((ushort)0xFF80)), Vector512.Zero) : + ((vector.AsUInt16() & Vector512.Create((ushort)0xFF80)) == Vector512.Zero); } } From dd39889a1bbfaaa6c46eb7923ed457499e19ff9c Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 6 Jul 2023 14:03:34 -0700 Subject: [PATCH 06/14] Using intrinsics in AllCharsInVectorAreAscii --- .../src/System/Text/Ascii.Utility.cs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs index 849ae0e122ca0b..16c52fdcc7b27b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs @@ -1508,15 +1508,11 @@ private static bool AllCharsInVectorAreAscii(Vector512 vector) if (typeof(T) == typeof(byte)) { - return - Avx512F.IsSupported ? Vector512.EqualsAll(Vector512.BitwiseAnd(vector.AsByte(), Vector512.Create((byte)0x80)), Vector512.Zero) : - vector.AsByte().ExtractMostSignificantBits() == 0; + return vector.AsByte().ExtractMostSignificantBits() == 0; } else { - return - Avx512F.IsSupported ? Vector512.EqualsAll(Vector512.BitwiseAnd(vector.AsUInt16(), Vector512.Create((ushort)0xFF80)), Vector512.Zero) : - ((vector.AsUInt16() & Vector512.Create((ushort)0xFF80)) == Vector512.Zero); + return (vector.AsUInt16() & Vector512.Create((ushort)0xFF80)) == Vector512.Zero; } } From ce35528f9dccb59c7ab0cd29bd4420966a522733 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 6 Jul 2023 14:29:25 -0700 Subject: [PATCH 07/14] Removing check for AVX512F and adding a check for Vector512 because the library is not using any functions from AVX512F --- .../System.Private.CoreLib/src/System/Text/Ascii.Equality.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index 92785a8483b3f5..2833231c7ee584 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -236,7 +236,7 @@ private static bool EqualsIgnoreCase(ref TLeft left, ref } } } - else if (Avx512F.IsSupported && length >= (uint)Vector512.Count) + else if (Vector512.IsHardwareAccelerated && length >= (uint)Vector512.Count) { ref TLeft currentLeftSearchSpace = ref left; ref TLeft oneVectorAwayFromLeftEnd = ref Unsafe.Add(ref currentLeftSearchSpace, length - TLoader.Count512); From 75c33c20ada319c6ffed196731a33f4a9bbf03ab Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 6 Jul 2023 14:37:34 -0700 Subject: [PATCH 08/14] Removing check for CompExactlyDependsOn(AVX512F) from AllCharsInVectorAreAscii for Vector 512. Also checking for Vector512 support and not AVX512F in ASCIIEquality.Equals --- .../System.Private.CoreLib/src/System/Text/Ascii.Equality.cs | 2 +- .../System.Private.CoreLib/src/System/Text/Ascii.Utility.cs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index 2833231c7ee584..0d8277db9fc88b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -61,7 +61,7 @@ private static bool Equals(ref TLeft left, ref TRight ri } } } - else if (Avx512F.IsSupported && length >= (uint)Vector512.Count) + else if (Vector512.IsHardwareAccelerated && length >= (uint)Vector512.Count) { ref TLeft currentLeftSearchSpace = ref left; ref TLeft oneVectorAwayFromLeftEnd = ref Unsafe.Add(ref currentLeftSearchSpace, length - TLoader.Count512); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs index 16c52fdcc7b27b..5dde2bc247ce6a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Utility.cs @@ -1500,7 +1500,6 @@ private static bool AllCharsInVectorAreAscii(Vector256 vector) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512F))] private static bool AllCharsInVectorAreAscii(Vector512 vector) where T : unmanaged { From 6c600cf21758a67069be3bc688f4c8cdaec8fd54 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 11 Jul 2023 02:30:50 -0700 Subject: [PATCH 09/14] Correcting the Tloader.Count512 for ushort --- .../System.Private.CoreLib/src/System/Text/Ascii.Equality.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index 0d8277db9fc88b..3cf6c5b2a73d65 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -500,7 +500,7 @@ public static bool EqualAndAscii(ref T left, ref T right) { public static nuint Count128 => sizeof(long); public static nuint Count256 => (uint)Vector128.Count; - public static nuint Count512 => (uint)Vector512.Count; + public static nuint Count512 => (uint)Vector256.Count; [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Load128(ref byte ptr) From f1930ddca3b4d27ebb6aa50de3124f93f0f17127 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 11 Jul 2023 07:02:03 -0700 Subject: [PATCH 10/14] resolving merge errors --- .../System.Private.CoreLib/src/System/Text/Ascii.Equality.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index 3cf6c5b2a73d65..c8297f0a1e318c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -99,7 +99,7 @@ private static bool Equals(ref TLeft left, ref TRight ri } } } - else if (Avx.IsSupported && length >= (uint)Vector256.Count) + else if (Avx.IsSupported && length >= (uint)Vector256.Count) { ref TLeft currentLeftSearchSpace = ref left; ref TRight currentRightSearchSpace = ref right; From 6e019406bb7a39ed81c8b723f2da123748d921bd Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 11 Jul 2023 15:23:25 -0700 Subject: [PATCH 11/14] Adding TLoader method for Vector512 for EqualAndAscii --- .../src/System/Text/Ascii.Equality.cs | 91 +++++++++++++------ 1 file changed, 61 insertions(+), 30 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index c8297f0a1e318c..bdf049230b2943 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -61,42 +61,34 @@ private static bool Equals(ref TLeft left, ref TRight ri } } } - else if (Vector512.IsHardwareAccelerated && length >= (uint)Vector512.Count) + else if (Vector512.IsHardwareAccelerated && length >= (uint)Vector512.Count) { ref TLeft currentLeftSearchSpace = ref left; - ref TLeft oneVectorAwayFromLeftEnd = ref Unsafe.Add(ref currentLeftSearchSpace, length - TLoader.Count512); ref TRight currentRightSearchSpace = ref right; - ref TRight oneVectorAwayFromRightEnd = ref Unsafe.Add(ref currentRightSearchSpace, length - (uint)Vector512.Count); - - Vector512 leftValues; - Vector512 rightValues; + // Add Vector512.Count because TLeft == TRight + // Or we are in the Widen case where we iterate 2 * TRight.Count which is the same as TLeft.Count + Debug.Assert(Vector512.Count == Vector512.Count + || (typeof(TLoader) == typeof(WideningLoader) && Vector512.Count == Vector512.Count * 2)); + ref TRight oneVectorAwayFromRightEnd = ref Unsafe.Add(ref currentRightSearchSpace, length - (uint)Vector512.Count); // Loop until either we've finished all elements or there's less than a vector's-worth remaining. do { - leftValues = TLoader.Load512(ref currentLeftSearchSpace); - rightValues = Vector512.LoadUnsafe(ref currentRightSearchSpace); - - if (leftValues != rightValues || !AllCharsInVectorAreAscii(leftValues | rightValues)) + if (!TLoader.EqualAndAscii512(ref currentLeftSearchSpace, ref currentRightSearchSpace)) { return false; } - currentRightSearchSpace = ref Unsafe.Add(ref currentRightSearchSpace, Vector512.Count); - currentLeftSearchSpace = ref Unsafe.Add(ref currentLeftSearchSpace, TLoader.Count512); + currentRightSearchSpace = ref Unsafe.Add(ref currentRightSearchSpace, Vector512.Count); + currentLeftSearchSpace = ref Unsafe.Add(ref currentLeftSearchSpace, Vector512.Count); } while (!Unsafe.IsAddressGreaterThan(ref currentRightSearchSpace, ref oneVectorAwayFromRightEnd)); // If any elements remain, process the last vector in the search space. - if (length % (uint)Vector512.Count != 0) + if (length % (uint)Vector512.Count != 0) { - leftValues = TLoader.Load512(ref oneVectorAwayFromLeftEnd); - rightValues = Vector512.LoadUnsafe(ref oneVectorAwayFromRightEnd); - - if (leftValues != rightValues || !AllCharsInVectorAreAscii(leftValues | rightValues)) - { - return false; - } + ref TLeft oneVectorAwayFromLeftEnd = ref Unsafe.Add(ref left, length - (uint)Vector512.Count); + return TLoader.EqualAndAscii512(ref oneVectorAwayFromLeftEnd, ref oneVectorAwayFromRightEnd); } } else if (Avx.IsSupported && length >= (uint)Vector256.Count) @@ -112,7 +104,7 @@ private static bool Equals(ref TLeft left, ref TRight ri // Loop until either we've finished all elements or there's less than a vector's-worth remaining. do { - if (!TLoader.EqualAndAscii(ref currentLeftSearchSpace, ref currentRightSearchSpace)) + if (!TLoader.EqualAndAscii256(ref currentLeftSearchSpace, ref currentRightSearchSpace)) { return false; } @@ -126,7 +118,7 @@ private static bool Equals(ref TLeft left, ref TRight ri if (length % (uint)Vector256.Count != 0) { ref TLeft oneVectorAwayFromLeftEnd = ref Unsafe.Add(ref left, length - (uint)Vector256.Count); - return TLoader.EqualAndAscii(ref oneVectorAwayFromLeftEnd, ref oneVectorAwayFromRightEnd); + return TLoader.EqualAndAscii256(ref oneVectorAwayFromLeftEnd, ref oneVectorAwayFromRightEnd); } } else @@ -255,7 +247,6 @@ private static bool EqualsIgnoreCase(ref TLeft left, ref { leftValues = TLoader.Load512(ref currentLeftSearchSpace); rightValues = Vector512.LoadUnsafe(ref currentRightSearchSpace); - if (!AllCharsInVectorAreAscii(leftValues | rightValues)) { return false; @@ -467,7 +458,8 @@ private interface ILoader static abstract Vector128 Load128(ref TLeft ptr); static abstract Vector256 Load256(ref TLeft ptr); static abstract Vector512 Load512(ref TLeft ptr); - static abstract bool EqualAndAscii(ref TLeft left, ref TRight right); + static abstract bool EqualAndAscii256(ref TLeft left, ref TRight right); + static abstract bool EqualAndAscii512(ref TLeft left, ref TRight right); } private readonly struct PlainLoader : ILoader where T : unmanaged, INumberBase @@ -477,10 +469,11 @@ private interface ILoader public static nuint Count512 => (uint)Vector512.Count; public static Vector128 Load128(ref T ptr) => Vector128.LoadUnsafe(ref ptr); public static Vector256 Load256(ref T ptr) => Vector256.LoadUnsafe(ref ptr); + public static Vector512 Load512(ref T ptr) => Vector512.LoadUnsafe(ref ptr); [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Avx))] - public static bool EqualAndAscii(ref T left, ref T right) + public static bool EqualAndAscii256(ref T left, ref T right) { Vector256 leftValues = Vector256.LoadUnsafe(ref left); Vector256 rightValues = Vector256.LoadUnsafe(ref right); @@ -493,7 +486,19 @@ public static bool EqualAndAscii(ref T left, ref T right) return true; } - public static Vector512 Load512(ref T ptr) => Vector512.LoadUnsafe(ref ptr); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool EqualAndAscii512(ref T left, ref T right) + { + Vector512 leftValues = Vector512.LoadUnsafe(ref left); + Vector512 rightValues = Vector512.LoadUnsafe(ref right); + + if (leftValues != rightValues || !AllCharsInVectorAreAscii(leftValues)) + { + return false; + } + + return true; + } } private readonly struct WideningLoader : ILoader @@ -528,9 +533,16 @@ public static Vector256 Load256(ref byte ptr) return Vector256.Create(lower, upper); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Load512(ref byte ptr) + { + (Vector256 lower, Vector256 upper) = Vector256.Widen(Vector256.LoadUnsafe(ref ptr)); + return Vector512.Create(lower, upper); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Avx))] - public static bool EqualAndAscii(ref byte utf8, ref ushort utf16) + public static bool EqualAndAscii256(ref byte utf8, ref ushort utf16) { // We widen the utf8 param so we can compare it to utf16, this doubles how much of the utf16 vector we search Debug.Assert(Vector256.Count == Vector256.Count * 2); @@ -554,10 +566,29 @@ public static bool EqualAndAscii(ref byte utf8, ref ushort utf16) return true; } - public static Vector512 Load512(ref byte ptr) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool EqualAndAscii512(ref byte utf8, ref ushort utf16) { - (Vector256 lower, Vector256 upper) = Vector256.Widen(Vector256.LoadUnsafe(ref ptr)); - return Vector512.Create(lower, upper); + // We widen the utf8 param so we can compare it to utf16, this doubles how much of the utf16 vector we search + Debug.Assert(Vector512.Count == Vector512.Count * 2); + + Vector512 leftNotWidened = Vector512.LoadUnsafe(ref utf8); + if (!AllCharsInVectorAreAscii(leftNotWidened)) + { + return false; + } + + (Vector512 leftLower, Vector512 leftUpper) = Vector512.Widen(leftNotWidened); + Vector512 right = Vector512.LoadUnsafe(ref utf16); + Vector512 rightNext = Vector512.LoadUnsafe(ref utf16, (uint)Vector512.Count); + + // A branchless version of "leftLower != right || leftUpper != rightNext" + if (((leftLower ^ right) | (leftUpper ^ rightNext)) != Vector512.Zero) + { + return false; + } + + return true; } } } From f56f612d1b7640c70fa694445f58025f51ef84df Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 12 Jul 2023 21:50:51 -0700 Subject: [PATCH 12/14] Updating Load512 for WideningLoader for performance increase --- .../src/System/Text/Ascii.Equality.cs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index bdf049230b2943..93f8b4d190fa61 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -536,8 +536,19 @@ public static Vector256 Load256(ref byte ptr) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Load512(ref byte ptr) { - (Vector256 lower, Vector256 upper) = Vector256.Widen(Vector256.LoadUnsafe(ref ptr)); - return Vector512.Create(lower, upper); + // This is done here for performance gain. + // A similar implementation would be as below: + // + // (Vector256 lower, Vector256 upper) = Vector256.Widen(Vector256.LoadUnsafe(ref ptr)); + // return Vector512.Create(lower, upper); + // + // This is similar to what is done for Load256 here. But + // for Vector512, this implementation is low performance + // since a load and widen on Vector256 followed by a + // create on Vector512 is leading to a performance lower + // than that of similar implementationfor Vector256. + (Vector512 lower, Vector512 _) = Vector512.Widen(Vector256.LoadUnsafe(ref ptr).ToVector512()); + return lower; } [MethodImpl(MethodImplOptions.AggressiveInlining)] From a995aae3d50d8899a20e58b9cda3149225c60044 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 13 Jul 2023 14:32:13 -0700 Subject: [PATCH 13/14] addressing review comments --- .../src/System/Text/Ascii.Equality.cs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index 93f8b4d190fa61..036dae8c89b7e4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -536,17 +536,6 @@ public static Vector256 Load256(ref byte ptr) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Load512(ref byte ptr) { - // This is done here for performance gain. - // A similar implementation would be as below: - // - // (Vector256 lower, Vector256 upper) = Vector256.Widen(Vector256.LoadUnsafe(ref ptr)); - // return Vector512.Create(lower, upper); - // - // This is similar to what is done for Load256 here. But - // for Vector512, this implementation is low performance - // since a load and widen on Vector256 followed by a - // create on Vector512 is leading to a performance lower - // than that of similar implementationfor Vector256. (Vector512 lower, Vector512 _) = Vector512.Widen(Vector256.LoadUnsafe(ref ptr).ToVector512()); return lower; } From 9e4c0058b44b1647f39918c40411acca8ad17c79 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 18 Jul 2023 10:59:46 -0700 Subject: [PATCH 14/14] Addressing review changes. Changing Widen to WidenLower for Load512 --- .../System.Private.CoreLib/src/System/Text/Ascii.Equality.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs index 036dae8c89b7e4..038b042205c352 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Equality.cs @@ -536,8 +536,7 @@ public static Vector256 Load256(ref byte ptr) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Load512(ref byte ptr) { - (Vector512 lower, Vector512 _) = Vector512.Widen(Vector256.LoadUnsafe(ref ptr).ToVector512()); - return lower; + return Vector512.WidenLower(Vector256.LoadUnsafe(ref ptr).ToVector512()); } [MethodImpl(MethodImplOptions.AggressiveInlining)]