Skip to content

Commit

Permalink
Add tests for exotic external tar asset archives, fix some more corne…
Browse files Browse the repository at this point in the history
…r case bugs (dotnet#74412)

* Remove unused _readFirstEntry. Remnant from before we created PaxGlobalExtendedAttributesEntry.

* Set the position of the freshly copied data stream to 0, so the first user access of the DataStream property gives them a stream ready to read from the beginning.

* Process a PAX actual entry's data block only after the extended attributes are analyzed, in case the size is found as an extended attribute and needs to be overriden.

* Add tests to verify the entries of the new external tar assets can be read. Verify their DataStream if available.

* Add copyData argument to recent alignment padding tests.

* Throw an exception sooner and with a clearer message when a data section is unexpected for the entry type.

* Allow trailing nulls and spaces in octal fields.
Co-authored-by: @am11 Adeel Mujahid <3840695+am11@users.noreply.github.com>

* Throw a clearer exception if the unsupported sparse file entry type is encountered. These entries have additional data that indicates the locations of sparse bytes, which cannot be read with just the size field. So to avoid accidentally offseting the reader, we throw.

* Tests.

* Rename to TrimLeadingNullsAndSpaces

Co-authored-by: carlossanlop <carlossanlop@users.noreply.github.com>
  • Loading branch information
carlossanlop and carlossanlop committed Aug 23, 2022
1 parent 149337c commit d370161
Show file tree
Hide file tree
Showing 9 changed files with 705 additions and 65 deletions.
3 changes: 3 additions & 0 deletions src/libraries/System.Formats.Tar/src/Resources/Strings.resx
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@
<value>The entry is a symbolic link or a hard link but the LinkName field is null or empty.</value>
</data>
<data name="TarEntryTypeNotSupported" xml:space="preserve">
<value>Entry type '{0}' not supported.</value>
</data>
<data name="TarEntryTypeNotSupportedInFormat" xml:space="preserve">
<value>Entry type '{0}' not supported in format '{1}'.</value>
</data>
<data name="TarEntryTypeNotSupportedForExtracting" xml:space="preserve">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ internal sealed partial class TarHeader
// Attempts to retrieve the next header from the specified tar archive stream.
// Throws if end of stream is reached or if any data type conversion fails.
// Returns a valid TarHeader object if the attributes were read successfully, null otherwise.
internal static TarHeader? TryGetNextHeader(Stream archiveStream, bool copyData, TarEntryFormat initialFormat)
internal static TarHeader? TryGetNextHeader(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, bool processDataBlock)
{
// The four supported formats have a header that fits in the default record size
Span<byte> buffer = stackalloc byte[TarHelpers.RecordSize];

archiveStream.ReadExactly(buffer);

TarHeader? header = TryReadAttributes(initialFormat, buffer);
if (header != null)
if (header != null && processDataBlock)
{
header.ProcessDataBlock(archiveStream, copyData);
}
Expand All @@ -39,7 +39,7 @@ internal sealed partial class TarHeader
// Asynchronously attempts read all the fields of the next header.
// Throws if end of stream is reached or if any data type conversion fails.
// Returns true if all the attributes were read successfully, false otherwise.
internal static async ValueTask<TarHeader?> TryGetNextHeaderAsync(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, CancellationToken cancellationToken)
internal static async ValueTask<TarHeader?> TryGetNextHeaderAsync(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, bool processDataBlock, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();

Expand All @@ -50,7 +50,7 @@ internal sealed partial class TarHeader
await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false);

TarHeader? header = TryReadAttributes(initialFormat, buffer.Span);
if (header != null)
if (header != null && processDataBlock)
{
await header.ProcessDataBlockAsync(archiveStream, copyData, cancellationToken).ConfigureAwait(false);
}
Expand Down Expand Up @@ -180,7 +180,7 @@ internal void ReplaceNormalAttributesWithExtended(Dictionary<string, string>? di
// will get all the data section read and the stream pointer positioned at the beginning of the next header.
// - Block, Character, Directory, Fifo, HardLink and SymbolicLink typeflag entries have no data section so the archive stream pointer will be positioned at the beginning of the next header.
// - All other typeflag entries with a data section will generate a stream wrapping the data section: SeekableSubReadStream for seekable archive streams, and SubReadStream for unseekable archive streams.
private void ProcessDataBlock(Stream archiveStream, bool copyData)
internal void ProcessDataBlock(Stream archiveStream, bool copyData)
{
bool skipBlockAlignmentPadding = true;

Expand All @@ -199,6 +199,10 @@ private void ProcessDataBlock(Stream archiveStream, bool copyData)
case TarEntryType.HardLink:
case TarEntryType.SymbolicLink:
// No data section
if (_size > 0)
{
throw new FormatException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag));
}
break;
case TarEntryType.RegularFile:
case TarEntryType.V7RegularFile: // Treated as regular file
Expand Down Expand Up @@ -257,6 +261,10 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
case TarEntryType.HardLink:
case TarEntryType.SymbolicLink:
// No data section
if (_size > 0)
{
throw new FormatException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag));
}
break;
case TarEntryType.RegularFile:
case TarEntryType.V7RegularFile: // Treated as regular file
Expand Down Expand Up @@ -311,6 +319,8 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
{
MemoryStream copiedData = new MemoryStream();
TarHelpers.CopyBytes(archiveStream, copiedData, _size);
// Reset position pointer so the user can do the first DataStream read from the beginning
copiedData.Position = 0;
return copiedData;
}

Expand All @@ -336,6 +346,8 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
{
MemoryStream copiedData = new MemoryStream();
await TarHelpers.CopyBytesAsync(archiveStream, copiedData, size, cancellationToken).ConfigureAwait(false);
// Reset position pointer so the user can do the first DataStream read from the beginning
copiedData.Position = 0;
return copiedData;
}

Expand Down Expand Up @@ -396,12 +408,13 @@ TarEntryType.LongLink or
TarEntryType.LongPath or
TarEntryType.MultiVolume or
TarEntryType.RenamedOrSymlinked or
TarEntryType.SparseFile or
TarEntryType.TapeVolume => TarEntryFormat.Gnu,

// V7 is the only one that uses 'V7RegularFile'.
TarEntryType.V7RegularFile => TarEntryFormat.V7,

TarEntryType.SparseFile => throw new NotSupportedException(string.Format(SR.TarEntryTypeNotSupported, header._typeFlag)),

// We can quickly determine the *minimum* possible format if the entry type
// is the POSIX 'RegularFile', although later we could upgrade it to PAX or GNU
_ => (header._typeFlag == TarEntryType.RegularFile) ? TarEntryFormat.Ustar : TarEntryFormat.V7
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ internal static TarEntryType GetCorrectTypeFlagForFormat(TarEntryFormat format,
internal static T ParseOctal<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>
{
buffer = TrimEndingNullsAndSpaces(buffer);
buffer = TrimLeadingNullsAndSpaces(buffer);

if (buffer.Length == 0)
{
return T.Zero;
}

T octalFactor = T.CreateTruncating(8u);
T value = T.Zero;
Expand Down Expand Up @@ -243,6 +249,17 @@ internal static ReadOnlySpan<byte> TrimEndingNullsAndSpaces(ReadOnlySpan<byte> b
return buffer.Slice(0, trimmedLength);
}

private static ReadOnlySpan<byte> TrimLeadingNullsAndSpaces(ReadOnlySpan<byte> buffer)
{
int newStart = 0;
while (newStart < buffer.Length && buffer[newStart] is 0 or 32)
{
newStart++;
}

return buffer.Slice(newStart);
}

// Returns the ASCII string contained in the specified buffer of bytes,
// removing the trailing null or space chars.
internal static string GetTrimmedAsciiString(ReadOnlySpan<byte> buffer) => GetTrimmedString(buffer, Encoding.ASCII);
Expand Down Expand Up @@ -351,7 +368,7 @@ TarEntryType.RegularFile or
throw new FormatException(string.Format(SR.TarInvalidFormat, archiveFormat));
}

throw new InvalidOperationException(string.Format(SR.TarEntryTypeNotSupported, entryType, archiveFormat));
throw new InvalidOperationException(string.Format(SR.TarEntryTypeNotSupportedInFormat, entryType, archiveFormat));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ public sealed class TarReader : IDisposable, IAsyncDisposable
private readonly bool _leaveOpen;
private TarEntry? _previouslyReadEntry;
private List<Stream>? _dataStreamsToDispose;
private bool _readFirstEntry;
private bool _reachedEndMarkers;

internal Stream _archiveStream;
Expand All @@ -44,7 +43,6 @@ public TarReader(Stream archiveStream, bool leaveOpen = false)

_previouslyReadEntry = null;
_isDisposed = false;
_readFirstEntry = false;
_reachedEndMarkers = false;
}

Expand Down Expand Up @@ -124,11 +122,6 @@ public async ValueTask DisposeAsync()
TarHeader? header = TryGetNextEntryHeader(copyData);
if (header != null)
{
if (!_readFirstEntry)
{
_readFirstEntry = true;
}

TarEntry entry = header._format switch
{
TarEntryFormat.Pax => header._typeFlag is TarEntryType.GlobalExtendedAttributes ?
Expand Down Expand Up @@ -282,11 +275,6 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
TarHeader? header = await TryGetNextEntryHeaderAsync(copyData, cancellationToken).ConfigureAwait(false);
if (header != null)
{
if (!_readFirstEntry)
{
_readFirstEntry = true;
}

TarEntry entry = header._format switch
{
TarEntryFormat.Pax => header._typeFlag is TarEntryType.GlobalExtendedAttributes ?
Expand Down Expand Up @@ -319,7 +307,7 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
{
Debug.Assert(!_reachedEndMarkers);

TarHeader? header = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Unknown);
TarHeader? header = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Unknown, processDataBlock: true);

if (header == null)
{
Expand Down Expand Up @@ -361,7 +349,7 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel

Debug.Assert(!_reachedEndMarkers);

TarHeader? header = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Unknown, cancellationToken).ConfigureAwait(false);
TarHeader? header = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Unknown, processDataBlock: true, cancellationToken).ConfigureAwait(false);
if (header == null)
{
return null;
Expand Down Expand Up @@ -397,9 +385,10 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
// and returns the actual entry with the processed extended attributes saved in the _extendedAttributes dictionary.
private bool TryProcessExtendedAttributesHeader(TarHeader extendedAttributesHeader, bool copyData, [NotNullWhen(returnValue: true)] out TarHeader? actualHeader)
{
actualHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Pax);
// Don't process the data block of the actual entry just yet, because there's a slim chance
// that the extended attributes contain a size that we need to override in the header
actualHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Pax, processDataBlock: false);

// Now get the actual entry
if (actualHeader == null)
{
return false;
Expand All @@ -417,6 +406,9 @@ TarEntryType.LongLink or
// Replace all the attributes representing standard fields with the extended ones, if any
actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes);

// We retrieved the extended attributes, now we can read the data, and always with the right size
actualHeader.ProcessDataBlock(_archiveStream, copyData);

return true;
}

Expand All @@ -426,8 +418,9 @@ TarEntryType.LongLink or
{
cancellationToken.ThrowIfCancellationRequested();

// Now get the actual entry
TarHeader? actualHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Pax, cancellationToken).ConfigureAwait(false);
// Don't process the data block of the actual entry just yet, because there's a slim chance
// that the extended attributes contain a size that we need to override in the header
TarHeader? actualHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Pax, processDataBlock: false, cancellationToken).ConfigureAwait(false);
if (actualHeader == null)
{
return null;
Expand All @@ -451,6 +444,9 @@ TarEntryType.LongLink or
// Replace all the attributes representing standard fields with the extended ones, if any
actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes);

// We retrieved the extended attributes, now we can read the data, and always with the right size
actualHeader.ProcessDataBlock(_archiveStream, copyData);

return actualHeader;
}

Expand All @@ -460,7 +456,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
{
finalHeader = new(TarEntryFormat.Gnu);

TarHeader? secondHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu);
TarHeader? secondHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true);

// Get the second entry, which is the actual entry
if (secondHeader == null)
Expand All @@ -478,7 +474,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
if ((header._typeFlag is TarEntryType.LongLink && secondHeader._typeFlag is TarEntryType.LongPath) ||
(header._typeFlag is TarEntryType.LongPath && secondHeader._typeFlag is TarEntryType.LongLink))
{
TarHeader? thirdHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu);
TarHeader? thirdHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true);

// Get the third entry, which is the actual entry
if (thirdHeader == null)
Expand Down Expand Up @@ -537,7 +533,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
cancellationToken.ThrowIfCancellationRequested();

// Get the second entry, which is the actual entry
TarHeader? secondHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, cancellationToken).ConfigureAwait(false);
TarHeader? secondHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true, cancellationToken).ConfigureAwait(false);
if (secondHeader == null)
{
return null;
Expand All @@ -556,7 +552,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
(header._typeFlag is TarEntryType.LongPath && secondHeader._typeFlag is TarEntryType.LongLink))
{
// Get the third entry, which is the actual entry
TarHeader? thirdHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, cancellationToken).ConfigureAwait(false);
TarHeader? thirdHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true, cancellationToken).ConfigureAwait(false);
if (thirdHeader == null)
{
return null;
Expand Down
Loading

0 comments on commit d370161

Please sign in to comment.