Skip to content

Commit

Permalink
Format/Parse binary from/to BigInteger (#85392)
Browse files Browse the repository at this point in the history
* Initial draft commit: add FormatBigIntegerToBin().

* Fix comment: use '?:' to assign ValueStringBuilder variable to make it 'unsafe to return' so that can assign stackalloced.

* Refine FormatBigIntegerToBin(); and consider chars overflow scenario.

* Update Format code for final binary format definition.

* Refine FormatBigIntegerToBin().

* consider case where output is span

* Turn to use try..finally to return array pool.

* Initial add method BinNumberToBigInteger().

* Update FormatProvider.Number.cs to support AllowBinarySpecifier.

* Use BinNumberToBigInteger().

* Add tests of Format.

* Add tests of Parse().

* Improve Format(): use ValueStringBuilder just as wrapper for destination span, so to save extra buffer allocation and copy in ValueStringBuilder.

* Fix comment: use ch == '0' || ch == '1'

* Fix comment: refactor ParseNumber() to extract common abstract operations for previous Hex and new Binary.

* Fix comment: refine naming; make BinNumberToBigInteger() general pattern similar as HexNumberToBigInteger's

* Fix comment: use internal 'kcbitUint'.

* Fix comment: rename 'Bin' method names to 'Binary' ones; remove unnecessary Slice().
  • Loading branch information
lateapexearlyspeed authored Oct 12, 2023
1 parent 529f60e commit 71d6476
Show file tree
Hide file tree
Showing 5 changed files with 521 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,54 @@ private static unsafe bool AllowHyphenDuringParsing(NumberFormatInfo info)
return ret;
}

private interface IDigitParser
{
static abstract bool IsValidChar(char c);
static abstract bool IsHexOrBinaryParser();
}

private readonly struct IntegerDigitParser : IDigitParser
{
public static bool IsValidChar(char c) => char.IsAsciiDigit(c);

public static bool IsHexOrBinaryParser() => false;
}

private readonly struct HexDigitParser : IDigitParser
{
public static bool IsValidChar(char c) => HexConverter.IsHexChar((int)c);

public static bool IsHexOrBinaryParser() => true;
}

private readonly struct BinaryDigitParser : IDigitParser
{
public static bool IsValidChar(char c)
{
return (uint)c - '0' <= 1;
}

public static bool IsHexOrBinaryParser() => true;
}


private static unsafe bool ParseNumber(ref char* str, char* strEnd, NumberStyles options, scoped ref NumberBuffer number, StringBuilder? sb, NumberFormatInfo numfmt, bool parseDecimal)
{
if ((options & NumberStyles.AllowHexSpecifier) != 0)
{
return ParseNumberStyle<HexDigitParser>(ref str, strEnd, options, ref number, sb, numfmt, parseDecimal);
}

if ((options & NumberStyles.AllowBinarySpecifier) != 0)
{
return ParseNumberStyle<BinaryDigitParser>(ref str, strEnd, options, ref number, sb, numfmt, parseDecimal);
}

return ParseNumberStyle<IntegerDigitParser>(ref str, strEnd, options, ref number, sb, numfmt, parseDecimal);
}

private static unsafe bool ParseNumberStyle<TDigitParser>(ref char* str, char* strEnd, NumberStyles options, scoped ref NumberBuffer number, StringBuilder? sb, NumberFormatInfo numfmt, bool parseDecimal)
where TDigitParser : struct, IDigitParser
{
Debug.Assert(str != null);
Debug.Assert(strEnd != null);
Expand Down Expand Up @@ -440,11 +487,11 @@ private static unsafe bool ParseNumber(ref char* str, char* strEnd, NumberStyles
int digEnd = 0;
while (true)
{
if (char.IsAsciiDigit(ch) || (((options & NumberStyles.AllowHexSpecifier) != 0) && char.IsBetween((char)(ch | 0x20), 'a', 'f')))
if (TDigitParser.IsValidChar(ch))
{
state |= StateDigits;

if (ch != '0' || (state & StateNonZero) != 0 || (bigNumber && ((options & NumberStyles.AllowHexSpecifier) != 0)))
if (ch != '0' || (state & StateNonZero) != 0 || (bigNumber && TDigitParser.IsHexOrBinaryParser()))
{
if (digCount < maxParseDigits)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ public readonly struct BigInteger
IBinaryInteger<BigInteger>,
ISignedNumber<BigInteger>
{
private const uint kuMaskHighBit = unchecked((uint)int.MinValue);
private const int kcbitUint = 32;
private const int kcbitUlong = 64;
private const int DecimalScaleFactorMask = 0x00FF0000;
internal const uint kuMaskHighBit = unchecked((uint)int.MinValue);
internal const int kcbitUint = 32;
internal const int kcbitUlong = 64;
internal const int DecimalScaleFactorMask = 0x00FF0000;

// For values int.MinValue < n <= int.MaxValue, the value is stored in sign
// and _bits is null. For all other values, sign is +1 or -1 and the bits are in _bits
Expand Down
220 changes: 216 additions & 4 deletions src/libraries/System.Runtime.Numerics/src/System/Numerics/BigNumber.cs
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ internal static class BigNumber
| NumberStyles.AllowLeadingSign | NumberStyles.AllowTrailingSign
| NumberStyles.AllowParentheses | NumberStyles.AllowDecimalPoint
| NumberStyles.AllowThousands | NumberStyles.AllowExponent
| NumberStyles.AllowCurrencySymbol | NumberStyles.AllowHexSpecifier);
| NumberStyles.AllowCurrencySymbol | NumberStyles.AllowHexSpecifier
| NumberStyles.AllowBinarySpecifier);

private static ReadOnlySpan<uint> UInt32PowersOfTen => [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000];

Expand Down Expand Up @@ -371,10 +372,13 @@ internal static ParsingStatus TryParseBigInteger(ReadOnlySpan<char> value, Numbe
{
return HexNumberToBigInteger(ref bigNumber, out result);
}
else

if ((style & NumberStyles.AllowBinarySpecifier) != 0)
{
return NumberToBigInteger(ref bigNumber, out result);
return BinaryNumberToBigInteger(ref bigNumber, out result);
}

return NumberToBigInteger(ref bigNumber, out result);
}

internal static BigInteger ParseBigInteger(string value, NumberStyles style, NumberFormatInfo info)
Expand Down Expand Up @@ -505,6 +509,112 @@ private static ParsingStatus HexNumberToBigInteger(ref BigNumberBuffer number, o
}
}

private static ParsingStatus BinaryNumberToBigInteger(ref BigNumberBuffer number, out BigInteger result)
{
if (number.digits is null || number.digits.Length == 0)
{
result = default;
return ParsingStatus.Failed;
}

int totalDigitCount = number.digits.Length - 1; // Ignore trailing '\0'
int partialDigitCount;

(int blockCount, int remainder) = int.DivRem(totalDigitCount, BigInteger.kcbitUint);
if (remainder == 0)
{
partialDigitCount = 0;
}
else
{
blockCount++;
partialDigitCount = BigInteger.kcbitUint - remainder;
}

Debug.Assert(number.digits[0] is '0' or '1');
bool isNegative = number.digits[0] == '1';
uint currentBlock = isNegative ? 0xFF_FF_FF_FFu : 0x0;

uint[]? arrayFromPool = null;
Span<uint> buffer = ((uint)blockCount <= BigIntegerCalculator.StackAllocThreshold
? stackalloc uint[BigIntegerCalculator.StackAllocThreshold]
: arrayFromPool = ArrayPool<uint>.Shared.Rent(blockCount)).Slice(0, blockCount);

int bufferPos = blockCount - 1;

try
{
foreach (ReadOnlyMemory<char> digitsChunkMem in number.digits.GetChunks())
{
ReadOnlySpan<char> chunkDigits = digitsChunkMem.Span;
for (int i = 0; i < chunkDigits.Length; i++)
{
char digitChar = chunkDigits[i];
if (digitChar == '\0')
{
break;
}

Debug.Assert(digitChar is '0' or '1');
currentBlock = (currentBlock << 1) | (uint)(digitChar - '0');
partialDigitCount++;

if (partialDigitCount == BigInteger.kcbitUint)
{
buffer[bufferPos--] = currentBlock;
partialDigitCount = 0;

// we do not need to reset currentBlock now, because it should always set all its bits by left shift in subsequent iterations
}
}
}

Debug.Assert(partialDigitCount == 0 && bufferPos == -1);

buffer = buffer.TrimEnd(0u);

int sign;
uint[]? bits;

if (buffer.IsEmpty)
{
sign = 0;
bits = null;
}
else if (buffer.Length == 1)
{
sign = (int)buffer[0];
bits = null;

if ((!isNegative && sign < 0) || sign == int.MinValue)
{
bits = new[] { (uint)sign };
sign = isNegative ? -1 : 1;
}
}
else
{
sign = isNegative ? -1 : 1;
bits = buffer.ToArray();

if (isNegative)
{
NumericsHelpers.DangerousMakeTwosComplement(bits);
}
}

result = new BigInteger(sign, bits);
return ParsingStatus.OK;
}
finally
{
if (arrayFromPool is not null)
{
ArrayPool<uint>.Shared.Return(arrayFromPool);
}
}
}

//
// This threshold is for choosing the algorithm to use based on the number of digits.
//
Expand Down Expand Up @@ -996,6 +1106,105 @@ internal static char ParseFormatSpecifier(ReadOnlySpan<char> format, out int dig
}
}

private static string? FormatBigIntegerToBinary(bool targetSpan, BigInteger value, int digits, Span<char> destination, out int charsWritten, out bool spanSuccess)
{
// Get the bytes that make up the BigInteger.
byte[]? arrayToReturnToPool = null;
Span<byte> bytes = stackalloc byte[64]; // arbitrary threshold
if (!value.TryWriteOrCountBytes(bytes, out int bytesWrittenOrNeeded))
{
bytes = arrayToReturnToPool = ArrayPool<byte>.Shared.Rent(bytesWrittenOrNeeded);
bool success = value.TryWriteBytes(bytes, out _);
Debug.Assert(success);
}
bytes = bytes.Slice(0, bytesWrittenOrNeeded);

Debug.Assert(!bytes.IsEmpty);

byte highByte = bytes[^1];

int charsInHighByte = 9 - byte.LeadingZeroCount(value._sign >= 0 ? highByte : (byte)~highByte);
long tmpCharCount = charsInHighByte + ((long)(bytes.Length - 1) << 3);

if (tmpCharCount > Array.MaxLength)
{
Debug.Assert(arrayToReturnToPool is not null);
ArrayPool<byte>.Shared.Return(arrayToReturnToPool);

throw new FormatException(SR.Format_TooLarge);
}

int charsForBits = (int)tmpCharCount;

Debug.Assert(digits < Array.MaxLength);
int charsIncludeDigits = Math.Max(digits, charsForBits);

try
{
scoped ValueStringBuilder sb;
if (targetSpan)
{
if (charsIncludeDigits > destination.Length)
{
charsWritten = 0;
spanSuccess = false;
return null;
}

// Because we have ensured destination can take actual char length, so now just use ValueStringBuilder as wrapper so that subsequent logic can be reused by 2 flows (targetSpan and non-targetSpan);
// meanwhile there is no need to copy to destination again after format data for targetSpan flow.
sb = new ValueStringBuilder(destination);
}
else
{
// each byte is typically eight chars
sb = charsIncludeDigits > 512
? new ValueStringBuilder(charsIncludeDigits)
: new ValueStringBuilder(stackalloc char[512]);
}

if (digits > charsForBits)
{
sb.Append(value._sign >= 0 ? '0' : '1', digits - charsForBits);
}

AppendByte(ref sb, highByte, charsInHighByte - 1);

for (int i = bytes.Length - 2; i >= 0; i--)
{
AppendByte(ref sb, bytes[i]);
}

Debug.Assert(sb.Length == charsIncludeDigits);

if (targetSpan)
{
charsWritten = charsIncludeDigits;
spanSuccess = true;
return null;
}

charsWritten = 0;
spanSuccess = false;
return sb.ToString();
}
finally
{
if (arrayToReturnToPool is not null)
{
ArrayPool<byte>.Shared.Return(arrayToReturnToPool);
}
}

static void AppendByte(ref ValueStringBuilder sb, byte b, int startHighBit = 7)
{
for (int i = startHighBit; i >= 0; i--)
{
sb.Append((char)('0' + ((b >> i) & 0x1)));
}
}
}

internal static string FormatBigInteger(BigInteger value, string? format, NumberFormatInfo info)
{
return FormatBigInteger(targetSpan: false, value, format, format, info, default, out _, out _)!;
Expand All @@ -1020,7 +1229,10 @@ internal static bool TryFormatBigInteger(BigInteger value, ReadOnlySpan<char> fo
{
return FormatBigIntegerToHex(targetSpan, value, fmt, digits, info, destination, out charsWritten, out spanSuccess);
}

if (fmt == 'b' || fmt == 'B')
{
return FormatBigIntegerToBinary(targetSpan, value, digits, destination, out charsWritten, out spanSuccess);
}

if (value._bits == null)
{
Expand Down
Loading

0 comments on commit 71d6476

Please sign in to comment.