Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for exotic external tar asset archives, fix some more corner case bugs #74412

Merged
merged 10 commits into from
Aug 23, 2022
Merged
3 changes: 3 additions & 0 deletions src/libraries/System.Formats.Tar/src/Resources/Strings.resx
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@
<value>The entry is a symbolic link or a hard link but the LinkName field is null or empty.</value>
</data>
<data name="TarEntryTypeNotSupported" xml:space="preserve">
<value>Entry type '{0}' not supported.</value>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<value>Entry type '{0}' not supported.</value>
<value>Entry type '{0}' is not supported.</value>

And below

</data>
<data name="TarEntryTypeNotSupportedInFormat" xml:space="preserve">
<value>Entry type '{0}' not supported in format '{1}'.</value>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<value>Entry type '{0}' not supported in format '{1}'.</value>
<value>Entry type '{0}' is not supported in format '{1}'.</value>

</data>
<data name="TarEntryTypeNotSupportedForExtracting" xml:space="preserve">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ internal sealed partial class TarHeader
// Attempts to retrieve the next header from the specified tar archive stream.
// Throws if end of stream is reached or if any data type conversion fails.
// Returns a valid TarHeader object if the attributes were read successfully, null otherwise.
internal static TarHeader? TryGetNextHeader(Stream archiveStream, bool copyData, TarEntryFormat initialFormat)
internal static TarHeader? TryGetNextHeader(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, bool processDataBlock)
{
// The four supported formats have a header that fits in the default record size
Span<byte> buffer = stackalloc byte[TarHelpers.RecordSize];

archiveStream.ReadExactly(buffer);

TarHeader? header = TryReadAttributes(initialFormat, buffer);
if (header != null)
if (header != null && processDataBlock)
Copy link
Member

@jozkee jozkee Aug 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this better than one extra param?

Suggested change
if (header != null && processDataBlock)
if (header != null && initialFormat != TarEntryFormat.Pax)

Copy link
Member Author

@carlossanlop carlossanlop Aug 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. That was my initial approach, but then I opted for the extra boolean for these reasons:

  • We have multiple types of PAX entries: Global Extended Attriburtes, Extended Attributes, and the normal entries. I was introducing regressions in some of those cases.
  • It's much more clear what the boolean is for, rather than a not-equal check.

{
header.ProcessDataBlock(archiveStream, copyData);
}
Expand All @@ -39,7 +39,7 @@ internal sealed partial class TarHeader
// Asynchronously attempts read all the fields of the next header.
// Throws if end of stream is reached or if any data type conversion fails.
// Returns true if all the attributes were read successfully, false otherwise.
internal static async ValueTask<TarHeader?> TryGetNextHeaderAsync(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, CancellationToken cancellationToken)
internal static async ValueTask<TarHeader?> TryGetNextHeaderAsync(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, bool processDataBlock, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();

Expand All @@ -50,7 +50,7 @@ internal sealed partial class TarHeader
await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false);

TarHeader? header = TryReadAttributes(initialFormat, buffer.Span);
if (header != null)
if (header != null && processDataBlock)
{
await header.ProcessDataBlockAsync(archiveStream, copyData, cancellationToken).ConfigureAwait(false);
}
Expand Down Expand Up @@ -180,7 +180,7 @@ internal void ReplaceNormalAttributesWithExtended(Dictionary<string, string>? di
// will get all the data section read and the stream pointer positioned at the beginning of the next header.
// - Block, Character, Directory, Fifo, HardLink and SymbolicLink typeflag entries have no data section so the archive stream pointer will be positioned at the beginning of the next header.
// - All other typeflag entries with a data section will generate a stream wrapping the data section: SeekableSubReadStream for seekable archive streams, and SubReadStream for unseekable archive streams.
private void ProcessDataBlock(Stream archiveStream, bool copyData)
internal void ProcessDataBlock(Stream archiveStream, bool copyData)
{
bool skipBlockAlignmentPadding = true;

Expand All @@ -199,6 +199,10 @@ private void ProcessDataBlock(Stream archiveStream, bool copyData)
case TarEntryType.HardLink:
case TarEntryType.SymbolicLink:
// No data section
if (_size > 0)
{
throw new FormatException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag));
}
break;
case TarEntryType.RegularFile:
case TarEntryType.V7RegularFile: // Treated as regular file
Expand Down Expand Up @@ -257,6 +261,10 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
case TarEntryType.HardLink:
case TarEntryType.SymbolicLink:
// No data section
if (_size > 0)
{
throw new FormatException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag));
}
break;
case TarEntryType.RegularFile:
case TarEntryType.V7RegularFile: // Treated as regular file
Expand Down Expand Up @@ -311,6 +319,8 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
{
MemoryStream copiedData = new MemoryStream();
TarHelpers.CopyBytes(archiveStream, copiedData, _size);
// Reset position pointer so the user can do the first DataStream read from the beginning
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: slightly better wording (don't trigger CI for this suggestion)

Suggested change
// Reset position pointer so the user can do the first DataStream read from the beginning
// Reset stream position so the user can do the first DataStream read from the beginning

also: do other usages of TarHelpers.CopyBytes also need to reset the position? If so, we could move it to this method itself.

copiedData.Position = 0;
return copiedData;
}

Expand All @@ -336,6 +346,8 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
{
MemoryStream copiedData = new MemoryStream();
await TarHelpers.CopyBytesAsync(archiveStream, copiedData, size, cancellationToken).ConfigureAwait(false);
// Reset position pointer so the user can do the first DataStream read from the beginning
copiedData.Position = 0;
return copiedData;
}

Expand Down Expand Up @@ -396,12 +408,13 @@ TarEntryType.LongLink or
TarEntryType.LongPath or
TarEntryType.MultiVolume or
TarEntryType.RenamedOrSymlinked or
TarEntryType.SparseFile or
TarEntryType.TapeVolume => TarEntryFormat.Gnu,

// V7 is the only one that uses 'V7RegularFile'.
TarEntryType.V7RegularFile => TarEntryFormat.V7,

TarEntryType.SparseFile => throw new NotSupportedException(string.Format(SR.TarEntryTypeNotSupported, header._typeFlag)),

// We can quickly determine the *minimum* possible format if the entry type
// is the POSIX 'RegularFile', although later we could upgrade it to PAX or GNU
_ => (header._typeFlag == TarEntryType.RegularFile) ? TarEntryFormat.Ustar : TarEntryFormat.V7
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ internal static TarEntryType GetCorrectTypeFlagForFormat(TarEntryFormat format,
internal static T ParseOctal<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>
{
buffer = TrimEndingNullsAndSpaces(buffer);
buffer = TrimLeadingNullsAndSpaces(buffer);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: either {Ending,Starting} or {Trailing,Leading} pair

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine, most tools I have tried only ignore nulls and spaces. I haven't found a spec explicitly calling it out how lenient the implementation can or should be, but libarchive's behavior is that it ignores all non-octal characters.

Maybe add a comment linking to formal specification which talks about it, or if there is none:
"The formal specification does not specify it but we only trim nulls and spaces"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @am11. Sure, I can add a comment. If I don't get any blocking feedback to address, I'll add the comment in a separate PR to not block this with a CI re-run.


if (buffer.Length == 0)
{
return T.Zero;
}

T octalFactor = T.CreateTruncating(8u);
T value = T.Zero;
Expand Down Expand Up @@ -243,6 +249,17 @@ internal static ReadOnlySpan<byte> TrimEndingNullsAndSpaces(ReadOnlySpan<byte> b
return buffer.Slice(0, trimmedLength);
}

private static ReadOnlySpan<byte> TrimLeadingNullsAndSpaces(ReadOnlySpan<byte> buffer)
{
int newStart = 0;
while (newStart < buffer.Length && buffer[newStart] is 0 or 32)
{
newStart++;
}

return buffer.Slice(newStart);
}
Comment on lines +253 to +261
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: you can use the new IndexOfAnyExcept here

Suggested change
{
int newStart = 0;
while (newStart < buffer.Length && buffer[newStart] is 0 or 32)
{
newStart++;
}
return buffer.Slice(newStart);
}
=> buffer.Slice(Math.Max(0, buffer.IndexOfAnyExcept((byte)0, (byte)' '));

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it's a nit, I can address this in a separate PR.


// Returns the ASCII string contained in the specified buffer of bytes,
// removing the trailing null or space chars.
internal static string GetTrimmedAsciiString(ReadOnlySpan<byte> buffer) => GetTrimmedString(buffer, Encoding.ASCII);
Expand Down Expand Up @@ -351,7 +368,7 @@ TarEntryType.RegularFile or
throw new FormatException(string.Format(SR.TarInvalidFormat, archiveFormat));
}

throw new InvalidOperationException(string.Format(SR.TarEntryTypeNotSupported, entryType, archiveFormat));
throw new InvalidOperationException(string.Format(SR.TarEntryTypeNotSupportedInFormat, entryType, archiveFormat));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ public sealed class TarReader : IDisposable, IAsyncDisposable
private readonly bool _leaveOpen;
private TarEntry? _previouslyReadEntry;
private List<Stream>? _dataStreamsToDispose;
private bool _readFirstEntry;
private bool _reachedEndMarkers;

internal Stream _archiveStream;
Expand All @@ -44,7 +43,6 @@ public TarReader(Stream archiveStream, bool leaveOpen = false)

_previouslyReadEntry = null;
_isDisposed = false;
_readFirstEntry = false;
_reachedEndMarkers = false;
}

Expand Down Expand Up @@ -124,11 +122,6 @@ public async ValueTask DisposeAsync()
TarHeader? header = TryGetNextEntryHeader(copyData);
if (header != null)
{
if (!_readFirstEntry)
{
_readFirstEntry = true;
}

TarEntry entry = header._format switch
{
TarEntryFormat.Pax => header._typeFlag is TarEntryType.GlobalExtendedAttributes ?
Expand Down Expand Up @@ -282,11 +275,6 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
TarHeader? header = await TryGetNextEntryHeaderAsync(copyData, cancellationToken).ConfigureAwait(false);
if (header != null)
{
if (!_readFirstEntry)
{
_readFirstEntry = true;
}

TarEntry entry = header._format switch
{
TarEntryFormat.Pax => header._typeFlag is TarEntryType.GlobalExtendedAttributes ?
Expand Down Expand Up @@ -319,7 +307,7 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
{
Debug.Assert(!_reachedEndMarkers);

TarHeader? header = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Unknown);
TarHeader? header = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Unknown, processDataBlock: true);

if (header == null)
{
Expand Down Expand Up @@ -361,7 +349,7 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel

Debug.Assert(!_reachedEndMarkers);

TarHeader? header = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Unknown, cancellationToken).ConfigureAwait(false);
TarHeader? header = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Unknown, processDataBlock: true, cancellationToken).ConfigureAwait(false);
if (header == null)
{
return null;
Expand Down Expand Up @@ -397,9 +385,10 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
// and returns the actual entry with the processed extended attributes saved in the _extendedAttributes dictionary.
private bool TryProcessExtendedAttributesHeader(TarHeader extendedAttributesHeader, bool copyData, [NotNullWhen(returnValue: true)] out TarHeader? actualHeader)
{
actualHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Pax);
// Don't process the data block of the actual entry just yet, because there's a slim chance
// that the extended attributes contain a size that we need to override in the header
actualHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Pax, processDataBlock: false);

// Now get the actual entry
if (actualHeader == null)
{
return false;
Expand All @@ -417,6 +406,9 @@ TarEntryType.LongLink or
// Replace all the attributes representing standard fields with the extended ones, if any
actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes);

// We retrieved the extended attributes, now we can read the data, and always with the right size
actualHeader.ProcessDataBlock(_archiveStream, copyData);

return true;
}

Expand All @@ -426,8 +418,9 @@ TarEntryType.LongLink or
{
cancellationToken.ThrowIfCancellationRequested();

// Now get the actual entry
TarHeader? actualHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Pax, cancellationToken).ConfigureAwait(false);
// Don't process the data block of the actual entry just yet, because there's a slim chance
// that the extended attributes contain a size that we need to override in the header
TarHeader? actualHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Pax, processDataBlock: false, cancellationToken).ConfigureAwait(false);
if (actualHeader == null)
{
return null;
Expand All @@ -451,6 +444,9 @@ TarEntryType.LongLink or
// Replace all the attributes representing standard fields with the extended ones, if any
actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes);

// We retrieved the extended attributes, now we can read the data, and always with the right size
actualHeader.ProcessDataBlock(_archiveStream, copyData);

return actualHeader;
}

Expand All @@ -460,7 +456,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
{
finalHeader = new(TarEntryFormat.Gnu);

TarHeader? secondHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu);
TarHeader? secondHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true);

// Get the second entry, which is the actual entry
if (secondHeader == null)
Expand All @@ -478,7 +474,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
if ((header._typeFlag is TarEntryType.LongLink && secondHeader._typeFlag is TarEntryType.LongPath) ||
(header._typeFlag is TarEntryType.LongPath && secondHeader._typeFlag is TarEntryType.LongLink))
{
TarHeader? thirdHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu);
TarHeader? thirdHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true);

// Get the third entry, which is the actual entry
if (thirdHeader == null)
Expand Down Expand Up @@ -537,7 +533,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
cancellationToken.ThrowIfCancellationRequested();

// Get the second entry, which is the actual entry
TarHeader? secondHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, cancellationToken).ConfigureAwait(false);
TarHeader? secondHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true, cancellationToken).ConfigureAwait(false);
if (secondHeader == null)
{
return null;
Expand All @@ -556,7 +552,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
(header._typeFlag is TarEntryType.LongPath && secondHeader._typeFlag is TarEntryType.LongLink))
{
// Get the third entry, which is the actual entry
TarHeader? thirdHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, cancellationToken).ConfigureAwait(false);
TarHeader? thirdHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true, cancellationToken).ConfigureAwait(false);
if (thirdHeader == null)
{
return null;
Expand Down
Loading