From 295613cc08c6f640de604e6fcc63d2d320131108 Mon Sep 17 00:00:00 2001 From: carlossanlop <1175054+carlossanlop@users.noreply.github.com> Date: Mon, 3 Apr 2023 17:56:19 -0600 Subject: [PATCH 01/22] Add tests that verify we handle unseekable streams correctly. Adjust existing unseekable stream tests to verify correct for all formats and with multiple tar entries. --- .../TarReader/TarReader.GetNextEntry.Tests.cs | 33 ++++--- .../TarReader.GetNextEntryAsync.Tests.cs | 77 ++++++++------- .../TarWriter/TarWriter.WriteEntry.Base.cs | 3 - .../TarWriter/TarWriter.WriteEntry.Tests.cs | 96 ++++++++++++++++++- .../TarWriter.WriteEntryAsync.Tests.cs | 96 ++++++++++++++++++- 5 files changed, 245 insertions(+), 60 deletions(-) diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs index 2cede3a350c825..ad87b69e08cef2 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs @@ -161,13 +161,18 @@ public void GetNextEntry_CopyDataTrue_UnseekableArchive() Assert.Throws(() => entry.DataStream.Read(new byte[1])); } - [Fact] - public void GetNextEntry_CopyDataFalse_UnseekableArchive_Exceptions() + [Theory] + [InlineData(TarEntryFormat.V7)] + [InlineData(TarEntryFormat.Ustar)] + [InlineData(TarEntryFormat.Pax)] + [InlineData(TarEntryFormat.Gnu)] + public void GetNextEntry_CopyDataFalse_UnseekableArchive_Exceptions(TarEntryFormat format) { - MemoryStream archive = new MemoryStream(); - using (TarWriter writer = new TarWriter(archive, TarEntryFormat.Ustar, leaveOpen: true)) + TarEntryType fileEntryType = GetTarEntryTypeForTarEntryFormat(TarEntryType.RegularFile, format); + using MemoryStream archive = new MemoryStream(); + using (TarWriter writer = new TarWriter(archive, format, leaveOpen: true)) { - UstarTarEntry entry1 = new UstarTarEntry(TarEntryType.RegularFile, "file.txt"); + TarEntry entry1 = InvokeTarEntryCreationConstructor(format, fileEntryType, "file.txt"); entry1.DataStream = new MemoryStream(); using (StreamWriter streamWriter = new StreamWriter(entry1.DataStream, leaveOpen: true)) { @@ -176,30 +181,34 @@ public void GetNextEntry_CopyDataFalse_UnseekableArchive_Exceptions() entry1.DataStream.Seek(0, SeekOrigin.Begin); // Rewind to ensure it gets written from the beginning writer.WriteEntry(entry1); - UstarTarEntry entry2 = new UstarTarEntry(TarEntryType.Directory, "dir"); + TarEntry entry2 = InvokeTarEntryCreationConstructor(format, TarEntryType.Directory, "dir"); writer.WriteEntry(entry2); } archive.Seek(0, SeekOrigin.Begin); using WrappedStream wrapped = new WrappedStream(archive, canRead: true, canWrite: false, canSeek: false); - UstarTarEntry entry; + TarEntry entry; + byte[] b = new byte[1]; using (TarReader reader = new TarReader(wrapped)) // Unseekable { - entry = reader.GetNextEntry(copyData: false) as UstarTarEntry; + entry = reader.GetNextEntry(copyData: false); Assert.NotNull(entry); - Assert.Equal(TarEntryType.RegularFile, entry.EntryType); + Assert.Equal(fileEntryType, entry.EntryType); entry.DataStream.ReadByte(); // Reading is possible as long as we don't move to the next entry // Attempting to read the next entry should automatically move the position pointer to the beginning of the next header - Assert.NotNull(reader.GetNextEntry()); + TarEntry entry2 = reader.GetNextEntry(); + Assert.NotNull(entry2); + Assert.Equal(format, entry2.Format); + Assert.Equal(TarEntryType.Directory, entry2.EntryType); Assert.Null(reader.GetNextEntry()); // This is not possible because the position of the main stream is already past the data - Assert.Throws(() => entry.DataStream.Read(new byte[1])); + Assert.Throws(() => entry.DataStream.Read(b)); } // The reader must stay alive because it's in charge of disposing all the entries it collected - Assert.Throws(() => entry.DataStream.Read(new byte[1])); + Assert.Throws(() => entry.DataStream.Read(b)); } [Theory] diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs index f99e5853ebeaad..1c266ae6334345 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs @@ -191,49 +191,56 @@ public async Task GetNextEntry_CopyDataTrue_UnseekableArchive_Async() } } - [Fact] - public async Task GetNextEntry_CopyDataFalse_UnseekableArchive_Exceptions_Async() + [Theory] + [InlineData(TarEntryFormat.V7)] + [InlineData(TarEntryFormat.Ustar)] + [InlineData(TarEntryFormat.Pax)] + [InlineData(TarEntryFormat.Gnu)] + public async Task GetNextEntry_CopyDataFalse_UnseekableArchive_Exceptions_Async(TarEntryFormat format) { - await using (MemoryStream archive = new MemoryStream()) + TarEntryType fileEntryType = GetTarEntryTypeForTarEntryFormat(TarEntryType.RegularFile, format); + await using MemoryStream archive = new MemoryStream(); + await using (TarWriter writer = new TarWriter(archive, format, leaveOpen: true)) { - await using (TarWriter writer = new TarWriter(archive, TarEntryFormat.Ustar, leaveOpen: true)) + TarEntry entry1 = InvokeTarEntryCreationConstructor(format, fileEntryType, "file.txt"); + entry1.DataStream = new MemoryStream(); + using (StreamWriter streamWriter = new StreamWriter(entry1.DataStream, leaveOpen: true)) { - UstarTarEntry entry1 = new UstarTarEntry(TarEntryType.RegularFile, "file.txt"); - entry1.DataStream = new MemoryStream(); - using (StreamWriter streamWriter = new StreamWriter(entry1.DataStream, leaveOpen: true)) - { - streamWriter.WriteLine("Hello world!"); - } - entry1.DataStream.Seek(0, SeekOrigin.Begin); // Rewind to ensure it gets written from the beginning - await writer.WriteEntryAsync(entry1); - - UstarTarEntry entry2 = new UstarTarEntry(TarEntryType.Directory, "dir"); - await writer.WriteEntryAsync(entry2); + streamWriter.WriteLine("Hello world!"); } + entry1.DataStream.Seek(0, SeekOrigin.Begin); // Rewind to ensure it gets written from the beginning + await writer.WriteEntryAsync(entry1); - archive.Seek(0, SeekOrigin.Begin); - await using (WrappedStream wrapped = new WrappedStream(archive, canRead: true, canWrite: false, canSeek: false)) - { - UstarTarEntry entry; - await using (TarReader reader = new TarReader(wrapped)) // Unseekable - { - entry = await reader.GetNextEntryAsync(copyData: false) as UstarTarEntry; - Assert.NotNull(entry); - Assert.Equal(TarEntryType.RegularFile, entry.EntryType); - entry.DataStream.ReadByte(); // Reading is possible as long as we don't move to the next entry - - // Attempting to read the next entry should automatically move the position pointer to the beginning of the next header - Assert.NotNull(await reader.GetNextEntryAsync()); - Assert.Null(await reader.GetNextEntryAsync()); + TarEntry entry2 = InvokeTarEntryCreationConstructor(format, TarEntryType.Directory, "dir"); + await writer.WriteEntryAsync(entry2); + } - // This is not possible because the position of the main stream is already past the data - Assert.Throws(() => entry.DataStream.Read(new byte[1])); - } + archive.Seek(0, SeekOrigin.Begin); + await using WrappedStream wrapped = new WrappedStream(archive, canRead: true, canWrite: false, canSeek: false); + TarEntry entry; + byte[] b = new byte[1]; + await using (TarReader reader = new TarReader(wrapped)) // Unseekable + { + entry = await reader.GetNextEntryAsync(copyData: false); + Assert.NotNull(entry); + Assert.Equal(format, entry.Format); + Assert.Equal(fileEntryType, entry.EntryType); + entry.DataStream.ReadByte(); // Reading is possible as long as we don't move to the next entry + + // Attempting to read the next entries should automatically move the position pointer to the beginning of the next header + TarEntry entry2 = await reader.GetNextEntryAsync(); + Assert.NotNull(entry2); + Assert.Equal(format, entry2.Format); + Assert.Equal(TarEntryType.Directory, entry2.EntryType); + Assert.Null(await reader.GetNextEntryAsync()); - // The reader must stay alive because it's in charge of disposing all the entries it collected - Assert.Throws(() => entry.DataStream.Read(new byte[1])); - } + // This is not possible because the position of the main stream is already past the data + await Assert.ThrowsAsync(async () => await entry.DataStream.ReadAsync(b)); } + + // The reader must stay alive because it's in charge of disposing all the entries it collected + await Assert.ThrowsAsync(async () => await entry.DataStream.ReadAsync(b)); + } [Theory] diff --git a/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Base.cs b/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Base.cs index efe7c68ced6bb9..334a27e51fa92b 100644 --- a/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Base.cs +++ b/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Base.cs @@ -1,11 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System; using System.Collections.Generic; using System.IO; -using System.Linq; -using System.Text; using System.Threading.Tasks; using Xunit; diff --git a/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Tests.cs index 94522d54a61734..20a7ba1b180cdc 100644 --- a/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Tests.cs @@ -21,14 +21,18 @@ public void WriteEntry_AfterDispose_Throws() Assert.Throws(() => writer.WriteEntry(entry)); } - [Fact] - public void WriteEntry_FromUnseekableStream_AdvanceDataStream_WriteFromThatPosition() + + [Theory] + [InlineData(TarEntryFormat.V7)] + [InlineData(TarEntryFormat.Ustar)] + [InlineData(TarEntryFormat.Pax)] + [InlineData(TarEntryFormat.Gnu)] + public void WriteEntry_FromUnseekableStream_AdvanceDataStream_WriteFromThatPosition(TarEntryFormat format) { using MemoryStream source = GetTarMemoryStream(CompressionMethod.Uncompressed, TestTarFormat.ustar, "file"); using WrappedStream unseekable = new WrappedStream(source, canRead: true, canWrite: true, canSeek: false); using MemoryStream destination = new MemoryStream(); - using (TarReader reader1 = new TarReader(unseekable)) { TarEntry entry = reader1.GetNextEntry(); @@ -39,6 +43,8 @@ public void WriteEntry_FromUnseekableStream_AdvanceDataStream_WriteFromThatPosit using (TarWriter writer = new TarWriter(destination, TarEntryFormat.Ustar, leaveOpen: true)) { writer.WriteEntry(entry); + TarEntry dirEntry = InvokeTarEntryCreationConstructor(format, TarEntryType.Directory, "dir"); + writer.WriteEntry(dirEntry); // To validate that next entry is not affected } } @@ -54,6 +60,14 @@ public void WriteEntry_FromUnseekableStream_AdvanceDataStream_WriteFromThatPosit string contents = streamReader.ReadLine(); Assert.Equal("ello file", contents); } + + TarEntry dirEntry = reader2.GetNextEntry(); + Assert.NotNull(dirEntry); + Assert.Equal(format, dirEntry.Format); + Assert.Equal(TarEntryType.Directory, dirEntry.EntryType); + Assert.Equal("dir", dirEntry.Name); + + Assert.Null(reader2.GetNextEntry()); } } @@ -502,8 +516,8 @@ public void WriteEntry_FileSizeOverLegacyLimit_Throws(TarEntryFormat entryFormat { const long FileSizeOverLimit = LegacyMaxFileSize + 1; - MemoryStream ms = new(); - Stream s = unseekableStream ? new WrappedStream(ms, ms.CanRead, ms.CanWrite, canSeek: false) : ms; + using MemoryStream ms = new(); + using Stream s = unseekableStream ? new WrappedStream(ms, ms.CanRead, ms.CanWrite, canSeek: false) : ms; using TarWriter writer = new(s); TarEntry writeEntry = InvokeTarEntryCreationConstructor(entryFormat, entryFormat is TarEntryFormat.V7 ? TarEntryType.V7RegularFile : TarEntryType.RegularFile, "foo"); @@ -513,5 +527,77 @@ public void WriteEntry_FileSizeOverLegacyLimit_Throws(TarEntryFormat entryFormat Assert.Throws(() => writer.WriteEntry(writeEntry)); } + + [Theory] + [InlineData(TarEntryFormat.V7)] + [InlineData(TarEntryFormat.Ustar)] + [InlineData(TarEntryFormat.Pax)] + [InlineData(TarEntryFormat.Gnu)] + public void WritingUnseekableDataStream_To_UnseekableArchiveStream_Throws(TarEntryFormat entryFormat) + { + using MemoryStream internalDataStream = new(); + using WrappedStream unseekableDataStream = new(internalDataStream, canRead: true, canWrite: false, canSeek: false); + + using MemoryStream internalArchiveStream = new(); + using WrappedStream unseekableArchiveStream = new(internalArchiveStream, canRead: true, canWrite: true, canSeek: false); + + using TarWriter writer = new(unseekableArchiveStream); + TarEntry entry = InvokeTarEntryCreationConstructor(entryFormat, GetTarEntryTypeForTarEntryFormat(TarEntryType.RegularFile, entryFormat), "file.txt"); + entry.DataStream = unseekableDataStream; + Assert.Throws(() => writer.WriteEntry(entry)); + } + + [Theory] + [InlineData(TarEntryFormat.V7)] + [InlineData(TarEntryFormat.Ustar)] + [InlineData(TarEntryFormat.Pax)] + [InlineData(TarEntryFormat.Gnu)] + public void Write_TwoEntries_With_UnseekableDataStreams(TarEntryFormat entryFormat) + { + using MemoryStream internalDataStream1 = new(); + byte[] expectedBytes1 = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }; + internalDataStream1.Write(expectedBytes1.AsSpan()); + internalDataStream1.Position = 0; + + TarEntryType fileEntryType = GetTarEntryTypeForTarEntryFormat(TarEntryType.RegularFile, entryFormat); + + using WrappedStream unseekableDataStream1 = new(internalDataStream1, canRead: true, canWrite: false, canSeek: false); + TarEntry entry1 = InvokeTarEntryCreationConstructor(entryFormat, fileEntryType, "file1.txt"); + entry1.DataStream = unseekableDataStream1; + + using MemoryStream internalDataStream2 = new(); + byte[] expectedBytes2 = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }; + internalDataStream2.Write(expectedBytes2.AsSpan()); + internalDataStream2.Position = 0; + + using WrappedStream unseekableDataStream2 = new(internalDataStream2, canRead: true, canWrite: false, canSeek: false); + TarEntry entry2 = InvokeTarEntryCreationConstructor(entryFormat, fileEntryType, "file2.txt"); + entry2.DataStream = unseekableDataStream2; + + using MemoryStream archiveStream = new(); + using (TarWriter writer = new(archiveStream, leaveOpen: true)) + { + writer.WriteEntry(entry1); // Should not throw + writer.WriteEntry(entry2); // To verify that second entry is written in correct place + } + + // Verify + archiveStream.Position = 0; + byte[] actualBytes = new byte[] { 0, 0, 0, 0, 0 }; + using (TarReader reader = new(archiveStream)) + { + TarEntry readEntry = reader.GetNextEntry(); + Assert.NotNull(readEntry); + readEntry.DataStream.ReadExactly(actualBytes); + Assert.Equal(expectedBytes1, actualBytes); + + readEntry = reader.GetNextEntry(); + Assert.NotNull(readEntry); + readEntry.DataStream.ReadExactly(actualBytes); + Assert.Equal(expectedBytes2, actualBytes); + + Assert.Null(reader.GetNextEntry()); + } + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntryAsync.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntryAsync.Tests.cs index f0d4d238b2f8e8..1274572f0009f1 100644 --- a/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntryAsync.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntryAsync.Tests.cs @@ -43,8 +43,12 @@ public async Task WriteEntry_AfterDispose_Throws_Async() await Assert.ThrowsAsync(() => writer.WriteEntryAsync(entry)); } - [Fact] - public async Task WriteEntry_FromUnseekableStream_AdvanceDataStream_WriteFromThatPosition_Async() + [Theory] + [InlineData(TarEntryFormat.V7)] + [InlineData(TarEntryFormat.Ustar)] + [InlineData(TarEntryFormat.Pax)] + [InlineData(TarEntryFormat.Gnu)] + public async Task WriteEntry_FromUnseekableStream_AdvanceDataStream_WriteFromThatPosition_Async(TarEntryFormat format) { using MemoryStream source = GetTarMemoryStream(CompressionMethod.Uncompressed, TestTarFormat.ustar, "file"); using WrappedStream unseekable = new WrappedStream(source, canRead: true, canWrite: true, canSeek: false); @@ -57,9 +61,11 @@ public async Task WriteEntry_FromUnseekableStream_AdvanceDataStream_WriteFromTha Assert.NotNull(entry.DataStream); entry.DataStream.ReadByte(); // Advance one byte, now the expected string would be "ello file" - await using (TarWriter writer = new TarWriter(destination, TarEntryFormat.Ustar, leaveOpen: true)) + await using (TarWriter writer = new TarWriter(destination, format, leaveOpen: true)) { await writer.WriteEntryAsync(entry); + TarEntry dirEntry = InvokeTarEntryCreationConstructor(format, TarEntryType.Directory, "dir"); + await writer.WriteEntryAsync(dirEntry); // To validate that next entry is not affected } } @@ -75,6 +81,14 @@ public async Task WriteEntry_FromUnseekableStream_AdvanceDataStream_WriteFromTha string contents = streamReader.ReadLine(); Assert.Equal("ello file", contents); } + + TarEntry dirEntry = await reader2.GetNextEntryAsync(); + Assert.NotNull(dirEntry); + Assert.Equal(format, dirEntry.Format); + Assert.Equal(TarEntryType.Directory, dirEntry.EntryType); + Assert.Equal("dir", dirEntry.Name); + + Assert.Null(await reader2.GetNextEntryAsync()); } } @@ -420,8 +434,8 @@ public async Task WriteEntry_FileSizeOverLegacyLimit_Throws_Async(TarEntryFormat { const long FileSizeOverLimit = LegacyMaxFileSize + 1; - MemoryStream ms = new(); - Stream s = unseekableStream ? new WrappedStream(ms, ms.CanRead, ms.CanWrite, canSeek: false) : ms; + await using MemoryStream ms = new(); + await using Stream s = unseekableStream ? new WrappedStream(ms, ms.CanRead, ms.CanWrite, canSeek: false) : ms; string tarFilePath = GetTestFilePath(); await using TarWriter writer = new(File.Create(tarFilePath)); @@ -432,5 +446,77 @@ public async Task WriteEntry_FileSizeOverLegacyLimit_Throws_Async(TarEntryFormat await Assert.ThrowsAsync(() => writer.WriteEntryAsync(writeEntry)); } + + [Theory] + [InlineData(TarEntryFormat.V7)] + [InlineData(TarEntryFormat.Ustar)] + [InlineData(TarEntryFormat.Pax)] + [InlineData(TarEntryFormat.Gnu)] + public async Task WritingUnseekableDataStream_To_UnseekableArchiveStream_Throws_Async(TarEntryFormat entryFormat) + { + await using MemoryStream internalDataStream = new(); + await using WrappedStream unseekableDataStream = new(internalDataStream, canRead: true, canWrite: false, canSeek: false); + + await using MemoryStream internalArchiveStream = new(); + await using WrappedStream unseekableArchiveStream = new(internalArchiveStream, canRead: true, canWrite: true, canSeek: false); + + await using TarWriter writer = new(unseekableArchiveStream); + TarEntry entry = InvokeTarEntryCreationConstructor(entryFormat, GetTarEntryTypeForTarEntryFormat(TarEntryType.RegularFile, entryFormat), "file.txt"); + entry.DataStream = unseekableDataStream; + await Assert.ThrowsAsync(() => writer.WriteEntryAsync(entry)); + } + + [Theory] + [InlineData(TarEntryFormat.V7)] + [InlineData(TarEntryFormat.Ustar)] + [InlineData(TarEntryFormat.Pax)] + [InlineData(TarEntryFormat.Gnu)] + public async Task Write_TwoEntries_With_UnseekableDataStreams_Async(TarEntryFormat entryFormat) + { + await using MemoryStream internalDataStream1 = new(); + byte[] expectedBytes1 = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }; + await internalDataStream1.WriteAsync(expectedBytes1.AsMemory()); + internalDataStream1.Position = 0; + + TarEntryType fileEntryType = GetTarEntryTypeForTarEntryFormat(TarEntryType.RegularFile, entryFormat); + + await using WrappedStream unseekableDataStream1 = new(internalDataStream1, canRead: true, canWrite: false, canSeek: false); + TarEntry entry1 = InvokeTarEntryCreationConstructor(entryFormat, fileEntryType, "file1.txt"); + entry1.DataStream = unseekableDataStream1; + + await using MemoryStream internalDataStream2 = new(); + byte[] expectedBytes2 = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }; + await internalDataStream2.WriteAsync(expectedBytes2.AsMemory()); + internalDataStream2.Position = 0; + + await using WrappedStream unseekableDataStream2 = new(internalDataStream2, canRead: true, canWrite: false, canSeek: false); + TarEntry entry2 = InvokeTarEntryCreationConstructor(entryFormat, fileEntryType, "file2.txt"); + entry2.DataStream = unseekableDataStream2; + + await using MemoryStream archiveStream = new(); + await using (TarWriter writer = new(archiveStream, leaveOpen: true)) + { + await writer.WriteEntryAsync(entry1); // Should not throw + await writer.WriteEntryAsync(entry2); // To verify that second entry is written in correct place + } + + // Verify + archiveStream.Position = 0; + byte[] actualBytes = new byte[] { 0, 0, 0, 0, 0 }; + await using (TarReader reader = new(archiveStream)) + { + TarEntry readEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(readEntry); + await readEntry.DataStream.ReadExactlyAsync(actualBytes); + Assert.Equal(expectedBytes1, actualBytes); + + readEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(readEntry); + await readEntry.DataStream.ReadExactlyAsync(actualBytes); + Assert.Equal(expectedBytes2, actualBytes); + + Assert.Null(await reader.GetNextEntryAsync()); + } + } } } From d627497fd92ebdb054d8859d4330a0579c05e3fb Mon Sep 17 00:00:00 2001 From: carlossanlop <1175054+carlossanlop@users.noreply.github.com> Date: Mon, 3 Apr 2023 17:57:52 -0600 Subject: [PATCH 02/22] Add expected data field locations for all supported formats. --- .../src/System/Formats/Tar/FieldLocations.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/FieldLocations.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/FieldLocations.cs index 90856330aa2ef8..adc9819b2cab0f 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/FieldLocations.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/FieldLocations.cs @@ -49,5 +49,9 @@ internal static class FieldLocations internal const ushort V7Padding = LinkName + FieldLengths.LinkName; internal const ushort PosixPadding = Prefix + FieldLengths.Prefix; internal const ushort GnuPadding = RealSize + FieldLengths.RealSize; + + internal const ushort V7Data = V7Padding + FieldLengths.V7Padding; + internal const ushort PosixData = PosixPadding + FieldLengths.PosixPadding; + internal const ushort GnuData = GnuPadding + FieldLengths.GnuPadding; } } From 018f709561c9bba5db5cd468f7072fc84f43ec92 Mon Sep 17 00:00:00 2001 From: carlossanlop <1175054+carlossanlop@users.noreply.github.com> Date: Mon, 3 Apr 2023 17:58:50 -0600 Subject: [PATCH 03/22] Add exception message for when attempting to write an unseekable data stream into an unseekable archive stream. --- .../src/Resources/Strings.resx | 59 ++++++++++--------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx index 0ece0c6c561528..b86d4b287384a1 100644 --- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx +++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx @@ -1,17 +1,17 @@  - @@ -270,4 +270,7 @@ The value of the extended attribute key '{0}' contains a disallowed '{1}' character. - \ No newline at end of file + + Cannot write the unseekable data stream of entry '{0}' into an unseekable archive stream. + + From 9092cb4a4b850f29a6f09fe4e5abbb51256bf19d Mon Sep 17 00:00:00 2001 From: carlossanlop <1175054+carlossanlop@users.noreply.github.com> Date: Mon, 3 Apr 2023 17:59:31 -0600 Subject: [PATCH 04/22] Add seekability validation in public TarWriter entry writing methods. --- .../src/System/Formats/Tar/TarWriter.cs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs index c4ec17272a8b85..df77341be9a039 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs @@ -222,6 +222,7 @@ public void WriteEntry(TarEntry entry) ObjectDisposedException.ThrowIf(_isDisposed, this); ArgumentNullException.ThrowIfNull(entry); ValidateEntryLinkName(entry._header._typeFlag, entry._header._linkName); + ValidateStreamsSeekability(entry); WriteEntryInternal(entry); } @@ -270,6 +271,7 @@ public Task WriteEntryAsync(TarEntry entry, CancellationToken cancellationToken ObjectDisposedException.ThrowIf(_isDisposed, this); ArgumentNullException.ThrowIfNull(entry); ValidateEntryLinkName(entry._header._typeFlag, entry._header._linkName); + ValidateStreamsSeekability(entry); return WriteEntryAsyncInternal(entry, cancellationToken); } @@ -374,6 +376,14 @@ private async ValueTask WriteFinalRecordsAsync() return (fullPath, actualEntryName); } + private void ValidateStreamsSeekability(TarEntry entry) + { + if (!_archiveStream.CanSeek && entry._header._dataStream != null && !entry._header._dataStream.CanSeek) + { + throw new IOException(SR.Format(SR.TarStreamSeekabilityUnsupportedCombination, entry.Name)); + } + } + private static void ValidateEntryLinkName(TarEntryType entryType, string? linkName) { if (entryType is TarEntryType.HardLink or TarEntryType.SymbolicLink) From 6c460ebd67dd0d4bcf961ccec6b6dadae99f7aab Mon Sep 17 00:00:00 2001 From: carlossanlop <1175054+carlossanlop@users.noreply.github.com> Date: Mon, 3 Apr 2023 19:35:23 -0600 Subject: [PATCH 05/22] Add TarFile stream roundtrip tests for unseekable streams. --- ...TarFile.ExtractToDirectory.Stream.Tests.cs | 62 ++++++++++++++++++- ...le.ExtractToDirectoryAsync.Stream.Tests.cs | 62 ++++++++++++++++++- .../System.Formats.Tar/tests/TarTestsBase.cs | 8 +++ 3 files changed, 130 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs index ab90f0043bc7d9..6120820e1992d5 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs @@ -1,9 +1,9 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Collections.Generic; using System.IO; using System.IO.Compression; +using System.IO.Enumeration; using System.Linq; using Xunit; @@ -204,5 +204,65 @@ public void PaxNameCollision_DedupInExtendedAttributes() Assert.True(File.Exists(path1)); Assert.True(Path.Exists(path2)); } + + [Theory] + [MemberData(nameof(GetTestTarFormats))] + public void UnseekableStreams_RoundTrip(TestTarFormat testFormat) + { + using TempDirectory root = new(); + + using MemoryStream sourceStream = GetTarMemoryStream(CompressionMethod.Uncompressed, testFormat, "many_small_files"); + using WrappedStream sourceUnseekableArchiveStream = new(sourceStream, canRead: true, canWrite: false, canSeek: false); + + TarFile.ExtractToDirectory(sourceUnseekableArchiveStream, root.Path, overwriteFiles: false); + + using MemoryStream destinationStream = new(); + using WrappedStream destinationUnseekableArchiveStream = new(destinationStream, canRead: true, canWrite: true, canSeek: false); + TarFile.CreateFromDirectory(root.Path, destinationUnseekableArchiveStream, includeBaseDirectory: false); + + FileSystemEnumerable fileSystemEntries = new FileSystemEnumerable( + directory: root.Path, + transform: (ref FileSystemEntry entry) => entry.ToFileSystemInfo(), + options: new EnumerationOptions() { RecurseSubdirectories = true }); + + destinationStream.Position = 0; + using TarReader reader = new TarReader(destinationStream, leaveOpen: false); + + // Size of files in many_small_files.tar are expected to be tiny and all equal + int bufferLength = 1024; + byte[] buffer1 = new byte[bufferLength]; + byte[] buffer2 = new byte[bufferLength]; + TarEntry entry = reader.GetNextEntry(); + do + { + Assert.NotNull(entry); + string entryPath = Path.TrimEndingDirectorySeparator(Path.GetFullPath(Path.Join(root.Path, entry.Name))); + FileSystemInfo fsi = fileSystemEntries.SingleOrDefault(file => + file.FullName == entryPath); + Assert.NotNull(fsi); + if (entry.EntryType is TarEntryType.RegularFile or TarEntryType.V7RegularFile) + { + Assert.NotNull(entry.DataStream); + + using Stream fileData = File.OpenRead(fsi.FullName); + + // If the size of the files in manu_small_files.tar ever gets larger than bufferLength, + // these asserts should fail and the test will need to be updated + AssertExtensions.LessThanOrEqualTo(entry.Length, bufferLength); + AssertExtensions.LessThanOrEqualTo(fileData.Length, bufferLength); + + Assert.Equal(fileData.Length, entry.Length); + + Array.Clear(buffer1); + Array.Clear(buffer2); + + fileData.ReadExactly(buffer1, 0, (int)entry.Length); + entry.DataStream.ReadExactly(buffer2, 0, (int)entry.Length); + + AssertExtensions.SequenceEqual(buffer1, buffer2); + } + } + while ((entry = reader.GetNextEntry()) != null); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs index d7502d940e94e6..f50ead6ff11ac8 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs @@ -1,9 +1,9 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Collections.Generic; using System.IO; using System.IO.Compression; +using System.IO.Enumeration; using System.Linq; using System.Threading; using System.Threading.Tasks; @@ -269,5 +269,65 @@ public async Task PaxNameCollision_DedupInExtendedAttributesAsync() Assert.True(File.Exists(path1)); Assert.True(Path.Exists(path2)); } + + [Theory] + [MemberData(nameof(GetTestTarFormats))] + public async Task UnseekableStreams_RoundTrip_Async(TestTarFormat testFormat) + { + using TempDirectory root = new(); + + await using MemoryStream sourceStream = GetTarMemoryStream(CompressionMethod.Uncompressed, testFormat, "many_small_files"); + await using WrappedStream sourceUnseekableArchiveStream = new(sourceStream, canRead: true, canWrite: false, canSeek: false); + + await TarFile.ExtractToDirectoryAsync(sourceUnseekableArchiveStream, root.Path, overwriteFiles: false); + + await using MemoryStream destinationStream = new(); + await using WrappedStream destinationUnseekableArchiveStream = new(destinationStream, canRead: true, canWrite: true, canSeek: false); + await TarFile.CreateFromDirectoryAsync(root.Path, destinationUnseekableArchiveStream, includeBaseDirectory: false); + + FileSystemEnumerable fileSystemEntries = new FileSystemEnumerable( + directory: root.Path, + transform: (ref FileSystemEntry entry) => entry.ToFileSystemInfo(), + options: new EnumerationOptions() { RecurseSubdirectories = true }); + + destinationStream.Position = 0; + await using TarReader reader = new TarReader(destinationStream, leaveOpen: false); + + // Size of files in many_small_files.tar are expected to be tiny and all equal + int bufferLength = 1024; + byte[] buffer1 = new byte[bufferLength]; + byte[] buffer2 = new byte[bufferLength]; + TarEntry entry = await reader.GetNextEntryAsync(); + do + { + Assert.NotNull(entry); + string entryPath = Path.TrimEndingDirectorySeparator(Path.GetFullPath(Path.Join(root.Path, entry.Name))); + FileSystemInfo fsi = fileSystemEntries.SingleOrDefault(file => + file.FullName == entryPath); + Assert.NotNull(fsi); + if (entry.EntryType is TarEntryType.RegularFile or TarEntryType.V7RegularFile) + { + Assert.NotNull(entry.DataStream); + + await using Stream fileData = File.OpenRead(fsi.FullName); + + // If the size of the files in manu_small_files.tar ever gets larger than bufferLength, + // these asserts should fail and the test will need to be updated + AssertExtensions.LessThanOrEqualTo(entry.Length, bufferLength); + AssertExtensions.LessThanOrEqualTo(fileData.Length, bufferLength); + + Assert.Equal(fileData.Length, entry.Length); + + Array.Clear(buffer1); + Array.Clear(buffer2); + + await fileData.ReadExactlyAsync(buffer1, 0, (int)entry.Length); + await entry.DataStream.ReadExactlyAsync(buffer2, 0, (int)entry.Length); + + AssertExtensions.SequenceEqual(buffer1, buffer2); + } + } + while ((entry = await reader.GetNextEntryAsync()) != null); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs index 3363d279463958..e62b5d045d5521 100644 --- a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs +++ b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs @@ -489,6 +489,14 @@ protected static TarEntry InvokeTarEntryCreationConstructor(TarEntryFormat targe _ => throw new InvalidDataException($"Unexpected format: {targetFormat}") }; + public static IEnumerable GetTestTarFormats() + { + foreach (TestTarFormat testFormat in Enum.GetValues()) + { + yield return new object[] { testFormat }; + } + } + public static IEnumerable GetFormatsAndLinks() { foreach (TarEntryFormat format in new[] { TarEntryFormat.V7, TarEntryFormat.Ustar, TarEntryFormat.Pax, TarEntryFormat.Gnu }) From 6ea8e0fa017f6b9640a5dfb96da3e837e1d263ce Mon Sep 17 00:00:00 2001 From: carlossanlop <1175054+carlossanlop@users.noreply.github.com> Date: Mon, 3 Apr 2023 19:40:52 -0600 Subject: [PATCH 06/22] Add missing async TarFile roundtrip tests. --- .../System.Formats.Tar/tests/System.Formats.Tar.Tests.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/System.Formats.Tar/tests/System.Formats.Tar.Tests.csproj b/src/libraries/System.Formats.Tar/tests/System.Formats.Tar.Tests.csproj index ac75d0d7416db3..cffabe715ed098 100644 --- a/src/libraries/System.Formats.Tar/tests/System.Formats.Tar.Tests.csproj +++ b/src/libraries/System.Formats.Tar/tests/System.Formats.Tar.Tests.csproj @@ -18,6 +18,7 @@ + From f87b9e946e3853af265c1c024cb6b0e6d73692ff Mon Sep 17 00:00:00 2001 From: carlossanlop <1175054+carlossanlop@users.noreply.github.com> Date: Mon, 3 Apr 2023 19:41:13 -0600 Subject: [PATCH 07/22] Support unseekable streams in TarHeader.Write. --- .../src/System/Formats/Tar/TarHeader.Write.cs | 483 ++++++++++++++++-- 1 file changed, 433 insertions(+), 50 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 41be887ed38912..068ef3f37b5230 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -28,35 +28,125 @@ internal sealed partial class TarHeader // Writes the current header as a V7 entry into the archive stream. internal void WriteAsV7(Stream archiveStream, Span buffer) { - WriteV7FieldsToBuffer(buffer); + Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - archiveStream.Write(buffer); + if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter + { + WriteV7FieldsToBuffer(GetTotalDataBytesToWrite(), buffer); + + archiveStream.Write(buffer); - if (_dataStream != null) + if (_dataStream != null) + { + WriteData(archiveStream, _dataStream, _size); + } + } + else // seekable archive stream, unseekable data stream { - WriteData(archiveStream, _dataStream, _size); + WriteAsV7WithUnseekableDataStream(archiveStream, buffer); } } + // Writes the entry in the order required to be able to obtain the unseekable data stream size. + private void WriteAsV7WithUnseekableDataStream(Stream archiveStream, Span buffer) + { + // When the data stream is unseekable, the order in which we write the entry data changes + Debug.Assert(archiveStream.CanSeek); + Debug.Assert(_dataStream != null); + + // Store the start of the current entry's header, it'll be used later + long headerStartPosition = archiveStream.Position; + + // We know the exact location where the data starts depending on the format + long dataStartPosition = headerStartPosition + FieldLocations.V7Data; + + // Move to the data start location and write the data + archiveStream.Seek(FieldLocations.V7Data, SeekOrigin.Current); + _dataStream.CopyTo(archiveStream); // The data gets copied from the current position + + // Get the new archive stream position, and the difference is the size of the data stream + long dataEndPosition = archiveStream.Position; + long actualLength = dataEndPosition - dataStartPosition; + + // Write the padding now so that we can go back to writing the entry's header metadata + WritePadding(archiveStream, actualLength); + + // Store the end of the current header, we will write the next one after this position + long endOfHeaderPosition = archiveStream.Position; + + // Go back to the start of the entry header to write the rest of the fields + archiveStream.Position = headerStartPosition; + WriteV7FieldsToBuffer(actualLength, buffer); + archiveStream.Write(buffer); + + // Finally, move to the end of the header to continue with the next entry + archiveStream.Position = endOfHeaderPosition; + } + // Asynchronously writes the current header as a V7 entry into the archive stream and returns the value of the final checksum. internal async Task WriteAsV7Async(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - WriteV7FieldsToBuffer(buffer.Span); + Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter + { + WriteV7FieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); + + await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); - if (_dataStream != null) + if (_dataStream != null) + { + await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); + } + } + else // seekable archive stream, unseekable data stream { - await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); + await WriteAsV7WithUnseekableDataStreamAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); } } + // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. + private async Task WriteAsV7WithUnseekableDataStreamAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + { + // When the data stream is unseekable, the order in which we write the entry data changes + Debug.Assert(archiveStream.CanSeek); + Debug.Assert(_dataStream != null); + + // Store the start of the current entry's header, it'll be used later + long headerStartPosition = archiveStream.Position; + + // We know the exact location where the data starts depending on the format + long dataStartPosition = headerStartPosition + FieldLocations.V7Data; + + // Move to the data start location and write the data + archiveStream.Seek(FieldLocations.V7Data, SeekOrigin.Current); + await _dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position + + // Get the new archive stream position, and the difference is the size of the data stream + long dataEndPosition = archiveStream.Position; + long actualLength = dataEndPosition - dataStartPosition; + + // Write the padding now so that we can go back to writing the entry's header metadata + await WritePaddingAsync(archiveStream, actualLength, cancellationToken).ConfigureAwait(false); + + // Store the end of the current header, we will write the next one after this position + long endOfHeaderPosition = archiveStream.Position; + + // Go back to the start of the entry header to write the rest of the fields + archiveStream.Position = headerStartPosition; + WriteV7FieldsToBuffer(actualLength, buffer.Span); + await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + + // Finally, move to the end of the header to continue with the next entry + archiveStream.Position = endOfHeaderPosition; + } + // Writes the V7 header fields to the specified buffer, calculates and writes the checksum, then returns the final data length. - private void WriteV7FieldsToBuffer(Span buffer) + private void WriteV7FieldsToBuffer(long size, Span buffer) { - _size = GetTotalDataBytesToWrite(); + _size = size; TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.V7, _typeFlag); int tmpChecksum = WriteName(buffer); @@ -67,41 +157,131 @@ private void WriteV7FieldsToBuffer(Span buffer) // Writes the current header as a Ustar entry into the archive stream. internal void WriteAsUstar(Stream archiveStream, Span buffer) { - WriteUstarFieldsToBuffer(buffer); + Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - archiveStream.Write(buffer); + if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter + { + WriteUstarFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); + + archiveStream.Write(buffer); - if (_dataStream != null) + if (_dataStream != null) + { + WriteData(archiveStream, _dataStream, _size); + } + } + else // seekable archive stream, unseekable data stream { - WriteData(archiveStream, _dataStream, _size); + WriteAsUstarWithUnseekableDataStream(archiveStream, buffer); } } + // Writes the entry in the order required to be able to obtain the unseekable data stream size. + private void WriteAsUstarWithUnseekableDataStream(Stream archiveStream, Span buffer) + { + // When the data stream is unseekable, the order in which we write the entry data changes + Debug.Assert(archiveStream.CanSeek); + Debug.Assert(_dataStream != null); + + // Store the start of the current entry's header, it'll be used later + long headerStartPosition = archiveStream.Position; + + // We know the exact location where the data starts depending on the format + long dataStartPosition = headerStartPosition + FieldLocations.PosixData; + + // Move to the data start location and write the data + archiveStream.Seek(FieldLocations.PosixData, SeekOrigin.Current); + _dataStream.CopyTo(archiveStream); // The data gets copied from the current position + + // Get the new archive stream position, and the difference is the size of the data stream + long dataEndPosition = archiveStream.Position; + long actualLength = dataEndPosition - dataStartPosition; + + // Write the padding now so that we can go back to writing the entry's header metadata + WritePadding(archiveStream, actualLength); + + // Store the end of the current header, we will write the next one after this position + long endOfHeaderPosition = archiveStream.Position; + + // Go back to the start of the entry header to write the rest of the fields + archiveStream.Position = headerStartPosition; + WriteUstarFieldsToBuffer(actualLength, buffer); + archiveStream.Write(buffer); + + // Finally, move to the end of the header to continue with the next entry + archiveStream.Position = endOfHeaderPosition; + } + // Asynchronously rites the current header as a Ustar entry into the archive stream and returns the value of the final checksum. internal async Task WriteAsUstarAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); + Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - WriteUstarFieldsToBuffer(buffer.Span); + if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter + { + WriteUstarFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); - await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); - if (_dataStream != null) + if (_dataStream != null) + { + await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); + } + } + else // seekable archive stream, unseekable data stream { - await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); + await WriteAsUstarWithUnseekableDataStreamAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); } } + // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. + private async Task WriteAsUstarWithUnseekableDataStreamAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + { + // When the data stream is unseekable, the order in which we write the entry data changes + Debug.Assert(archiveStream.CanSeek); + Debug.Assert(_dataStream != null); + + // Store the start of the current entry's header, it'll be used later + long headerStartPosition = archiveStream.Position; + + // We know the exact location where the data starts depending on the format + long dataStartPosition = headerStartPosition + FieldLocations.PosixData; + + // Move to the data start location and write the data + archiveStream.Seek(FieldLocations.PosixData, SeekOrigin.Current); + await _dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position + + // Get the new archive stream position, and the difference is the size of the data stream + long dataEndPosition = archiveStream.Position; + long actualLength = dataEndPosition - dataStartPosition; + + // Write the padding now so that we can go back to writing the entry's header metadata + await WritePaddingAsync(archiveStream, actualLength, cancellationToken).ConfigureAwait(false); + + // Store the end of the current header, we will write the next one after this position + long endOfHeaderPosition = archiveStream.Position; + + // Go back to the start of the entry header to write the rest of the fields + archiveStream.Position = headerStartPosition; + WriteUstarFieldsToBuffer(actualLength, buffer.Span); + await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + + // Finally, move to the end of the header to continue with the next entry + archiveStream.Position = endOfHeaderPosition; + } + // Writes the Ustar header fields to the specified buffer, calculates and writes the checksum, then returns the final data length. - private void WriteUstarFieldsToBuffer(Span buffer) + private void WriteUstarFieldsToBuffer(long size, Span buffer) { - _size = GetTotalDataBytesToWrite(); + _size = size; TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Ustar, _typeFlag); int tmpChecksum = WriteUstarName(buffer); tmpChecksum += WriteCommonFields(buffer, actualEntryType); tmpChecksum += WritePosixMagicAndVersion(buffer); tmpChecksum += WritePosixAndGnuSharedFields(buffer); + _checksum = WriteChecksum(tmpChecksum, buffer); } @@ -140,7 +320,6 @@ internal void WriteAsPax(Stream archiveStream, Span buffer) // First, we write the preceding extended attributes header TarHeader extendedAttributesHeader = new(TarEntryFormat.Pax); // Fill the current header's dict - _size = GetTotalDataBytesToWrite(); CollectExtendedAttributesFromStandardFieldsIfNeeded(); // And pass the attributes to the preceding extended attributes header for writing extendedAttributesHeader.WriteAsPaxExtendedAttributes(archiveStream, buffer, ExtendedAttributes, isGea: false, globalExtendedAttributesEntryNumber: -1); @@ -159,7 +338,6 @@ internal async Task WriteAsPaxAsync(Stream archiveStream, Memory buffer, C // First, we write the preceding extended attributes header TarHeader extendedAttributesHeader = new(TarEntryFormat.Pax); // Fill the current header's dict - _size = GetTotalDataBytesToWrite(); CollectExtendedAttributesFromStandardFieldsIfNeeded(); // And pass the attributes to the preceding extended attributes header for writing await extendedAttributesHeader.WriteAsPaxExtendedAttributesAsync(archiveStream, buffer, ExtendedAttributes, isGea: false, globalExtendedAttributesEntryNumber: -1, cancellationToken).ConfigureAwait(false); @@ -240,35 +418,125 @@ private static TarHeader GetGnuLongMetadataHeader(TarEntryType entryType, string // Writes the current header as a GNU entry into the archive stream. internal void WriteAsGnuInternal(Stream archiveStream, Span buffer) { - WriteAsGnuSharedInternal(buffer); + Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - archiveStream.Write(buffer); + if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter + { + WriteGnuFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); - if (_dataStream != null) + archiveStream.Write(buffer); + + if (_dataStream != null) + { + WriteData(archiveStream, _dataStream, _size); + } + } + else // seekable archive stream, unseekable data stream { - WriteData(archiveStream, _dataStream, _size); + WriteAsGnuWithUnseekableDataStream(archiveStream, buffer); } } + // Writes the entry in the order required to be able to obtain the unseekable data stream size. + private void WriteAsGnuWithUnseekableDataStream(Stream archiveStream, Span buffer) + { + // When the data stream is unseekable, the order in which we write the entry data changes + Debug.Assert(archiveStream.CanSeek); + Debug.Assert(_dataStream != null); + + // Store the start of the current entry's header, it'll be used later + long headerStartPosition = archiveStream.Position; + + // We know the exact location where the data starts depending on the format + long dataStartPosition = headerStartPosition + FieldLocations.GnuData; + + // Move to the data start location and write the data + archiveStream.Seek(FieldLocations.GnuData, SeekOrigin.Current); + _dataStream.CopyTo(archiveStream); // The data gets copied from the current position + + // Get the new archive stream position, and the difference is the size of the data stream + long dataEndPosition = archiveStream.Position; + long actualLength = dataEndPosition - dataStartPosition; + + // Write the padding now so that we can go back to writing the entry's header metadata + WritePadding(archiveStream, actualLength); + + // Store the end of the current header, we will write the next one after this position + long endOfHeaderPosition = archiveStream.Position; + + // Go back to the start of the entry header to write the rest of the fields + archiveStream.Position = headerStartPosition; + WriteGnuFieldsToBuffer(actualLength, buffer); + archiveStream.Write(buffer); + + // Finally, move to the end of the header to continue with the next entry + archiveStream.Position = endOfHeaderPosition; + } + // Asynchronously writes the current header as a GNU entry into the archive stream. internal async Task WriteAsGnuInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); + Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - WriteAsGnuSharedInternal(buffer.Span); + if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter + { - await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + WriteGnuFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); - if (_dataStream != null) + await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + + if (_dataStream != null) + { + await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); + } + } + else // seekable archive stream, unseekable data stream { - await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); + await WriteAsGnuWithUnseekableDataStreamAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); } } + // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. + private async Task WriteAsGnuWithUnseekableDataStreamAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + { + // When the data stream is unseekable, the order in which we write the entry data changes + Debug.Assert(archiveStream.CanSeek); + Debug.Assert(_dataStream != null); + + // Store the start of the current entry's header, it'll be used later + long headerStartPosition = archiveStream.Position; + + // We know the exact location where the data starts depending on the format + long dataStartPosition = headerStartPosition + FieldLocations.GnuData; + + // Move to the data start location and write the data + archiveStream.Seek(FieldLocations.GnuData, SeekOrigin.Current); + await _dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position + + // Get the new archive stream position, and the difference is the size of the data stream + long dataEndPosition = archiveStream.Position; + long actualLength = dataEndPosition - dataStartPosition; + + // Write the padding now so that we can go back to writing the entry's header metadata + await WritePaddingAsync(archiveStream, actualLength, cancellationToken).ConfigureAwait(false); + + // Store the end of the current header, we will write the next one after this position + long endOfHeaderPosition = archiveStream.Position; + + // Go back to the start of the entry header to write the rest of the fields + archiveStream.Position = headerStartPosition; + WriteGnuFieldsToBuffer(actualLength, buffer.Span); + await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + + // Finally, move to the end of the header to continue with the next entry + archiveStream.Position = endOfHeaderPosition; + } + // Shared checksum and data length calculations for GNU entry writing. - private void WriteAsGnuSharedInternal(Span buffer) + private void WriteGnuFieldsToBuffer(long size, Span buffer) { - _size = GetTotalDataBytesToWrite(); + _size = size; int tmpChecksum = WriteName(buffer); tmpChecksum += WriteCommonFields(buffer, TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Gnu, _typeFlag)); @@ -305,7 +573,7 @@ private void WriteAsPaxExtendedAttributesShared(bool isGea, int globalExtendedAt GenerateExtendedAttributeName(); _mode = TarHelpers.GetDefaultMode(_typeFlag); - _size = GetTotalDataBytesToWrite(); + //_size = GetTotalDataBytesToWrite(); _typeFlag = isGea ? TarEntryType.GlobalExtendedAttributes : TarEntryType.ExtendedAttributes; } @@ -313,35 +581,126 @@ private void WriteAsPaxExtendedAttributesShared(bool isGea, int globalExtendedAt // This method writes an entry as both entries require, using the data from the current header instance. private void WriteAsPaxInternal(Stream archiveStream, Span buffer) { - WriteAsPaxSharedInternal(buffer); + Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - archiveStream.Write(buffer); + if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter + { + WritePaxFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); - if (_dataStream != null) + archiveStream.Write(buffer); + + if (_dataStream != null) + { + WriteData(archiveStream, _dataStream, _size); + } + } + else // seekable archive stream, unseekable data stream { - WriteData(archiveStream, _dataStream, _size); + WriteAsPaxWithUnseekableDataStream(archiveStream, buffer); } } + // Writes the entry in the order required to be able to obtain the unseekable data stream size. + private void WriteAsPaxWithUnseekableDataStream(Stream archiveStream, Span buffer) + { + // When the data stream is unseekable, the order in which we write the entry data changes + Debug.Assert(archiveStream.CanSeek); + Debug.Assert(_dataStream != null); + + // Store the start of the current entry's header, it'll be used later + long headerStartPosition = archiveStream.Position; + + // We know the exact location where the data starts depending on the format + long dataStartPosition = headerStartPosition + FieldLocations.PosixData; + + // Move to the data start location and write the data + archiveStream.Seek(FieldLocations.PosixData, SeekOrigin.Current); + _dataStream.CopyTo(archiveStream); // The data gets copied from the current position + + // Get the new archive stream position, and the difference is the size of the data stream + long dataEndPosition = archiveStream.Position; + long actualLength = dataEndPosition - dataStartPosition; + + // Write the padding now so that we can go back to writing the entry's header metadata + WritePadding(archiveStream, actualLength); + + // Store the end of the current header, we will write the next one after this position + long endOfHeaderPosition = archiveStream.Position; + + // Go back to the start of the entry header to write the rest of the fields + archiveStream.Position = headerStartPosition; + WritePaxFieldsToBuffer(actualLength, buffer); + archiveStream.Write(buffer); + + // Finally, move to the end of the header to continue with the next entry + archiveStream.Position = endOfHeaderPosition; + } + // Both the Extended Attributes and Global Extended Attributes entry headers are written in a similar way, just the data changes // This method asynchronously writes an entry as both entries require, using the data from the current header instance. private async Task WriteAsPaxInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - WriteAsPaxSharedInternal(buffer.Span); + Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter + { + WritePaxFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); + + await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); - if (_dataStream != null) + if (_dataStream != null) + { + await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); + } + } + else // seekable archive stream, unseekable data stream { - await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); + await WriteAsPaxWithUnseekableDataStreamAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); } } + // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. + private async Task WriteAsPaxWithUnseekableDataStreamAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + { + // When the data stream is unseekable, the order in which we write the entry data changes + Debug.Assert(archiveStream.CanSeek); + Debug.Assert(_dataStream != null); + + // Store the start of the current entry's header, it'll be used later + long headerStartPosition = archiveStream.Position; + + // We know the exact location where the data starts depending on the format + long dataStartPosition = headerStartPosition + FieldLocations.PosixData; + + // Move to the data start location and write the data + archiveStream.Seek(FieldLocations.PosixData, SeekOrigin.Current); + await _dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position + + // Get the new archive stream position, and the difference is the size of the data stream + long dataEndPosition = archiveStream.Position; + long actualLength = dataEndPosition - dataStartPosition; + + // Write the padding now so that we can go back to writing the entry's header metadata + await WritePaddingAsync(archiveStream, actualLength, cancellationToken).ConfigureAwait(false); + + // Store the end of the current header, we will write the next one after this position + long endOfHeaderPosition = archiveStream.Position; + + // Go back to the start of the entry header to write the rest of the fields + archiveStream.Position = headerStartPosition; + WritePaxFieldsToBuffer(actualLength, buffer.Span); + await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + + // Finally, move to the end of the header to continue with the next entry + archiveStream.Position = endOfHeaderPosition; + } + // Shared checksum and data length calculations for PAX entry writing. - private void WriteAsPaxSharedInternal(Span buffer) + private void WritePaxFieldsToBuffer(long size, Span buffer) { + _size = size; int tmpChecksum = WriteName(buffer); tmpChecksum += WriteCommonFields(buffer, TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Pax, _typeFlag)); tmpChecksum += WritePosixMagicAndVersion(buffer); @@ -507,18 +866,19 @@ private int WriteCommonFields(Span buffer, TarEntryType actualEntryType) } // Calculates how many data bytes should be written, depending on the position pointer of the stream. + // Only works if the stream is seekable. private long GetTotalDataBytesToWrite() { - if (_dataStream != null) + if (_dataStream == null) { - long length = _dataStream.Length; - long position = _dataStream.Position; - if (position < length) - { - return length - position; - } + return 0; } - return 0; + Debug.Assert(_dataStream.CanSeek); + + long length = _dataStream.Length; + long position = _dataStream.Position; + + return position < length ? length - position : 0; } // Writes the magic and version fields of a ustar or pax entry into the specified spans. @@ -609,7 +969,12 @@ private int WriteGnuFields(Span buffer) private static void WriteData(Stream archiveStream, Stream dataStream, long actualLength) { dataStream.CopyTo(archiveStream); // The data gets copied from the current position + WritePadding(archiveStream, actualLength); + } + // Calculates the padding for the current entry and writes it after the data. + private static void WritePadding(Stream archiveStream, long actualLength) + { int paddingAfterData = TarHelpers.CalculatePadding(actualLength); if (paddingAfterData != 0) { @@ -623,6 +988,24 @@ private static void WriteData(Stream archiveStream, Stream dataStream, long actu } } + // Calculates the padding for the current entry and asynchronously writes it after the data. + private static ValueTask WritePaddingAsync(Stream archiveStream, long actualLength, CancellationToken cancellationToken) + { + int paddingAfterData = TarHelpers.CalculatePadding(actualLength); + if (paddingAfterData != 0) + { + Debug.Assert(paddingAfterData <= TarHelpers.RecordSize); + + byte[] b = new byte[TarHelpers.RecordSize]; + Memory padding = b.AsMemory(); + padding = padding.Slice(0, paddingAfterData); + + return archiveStream.WriteAsync(padding, cancellationToken); + } + + return ValueTask.CompletedTask; + } + // Asynchronously writes the current header's data stream into the archive stream. private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream, long actualLength, CancellationToken cancellationToken) { From e94fefb8ab8ab4da2f40caf4947abd3df17f554c Mon Sep 17 00:00:00 2001 From: carlossanlop <1175054+carlossanlop@users.noreply.github.com> Date: Mon, 3 Apr 2023 20:18:56 -0600 Subject: [PATCH 08/22] Reuse and simplify the code. --- .../src/System/Formats/Tar/TarHeader.Write.cs | 378 +++++------------- .../src/System/Formats/Tar/TarWriter.cs | 8 +- 2 files changed, 111 insertions(+), 275 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 068ef3f37b5230..88ea62b0a43184 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -25,14 +25,28 @@ internal sealed partial class TarHeader private const string GnuLongMetadataName = "././@LongLink"; private const string ArgNameEntry = "entry"; - // Writes the current header as a V7 entry into the archive stream. - internal void WriteAsV7(Stream archiveStream, Span buffer) + internal void WriteAs(TarEntryFormat format, Stream archiveStream, Span buffer) { + Debug.Assert(format > TarEntryFormat.Unknown && format <= TarEntryFormat.Gnu); Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter { - WriteV7FieldsToBuffer(GetTotalDataBytesToWrite(), buffer); + switch (format) + { + case TarEntryFormat.V7: + WriteV7FieldsToBuffer(GetTotalDataBytesToWrite(), buffer); + break; + case TarEntryFormat.Ustar: + WriteUstarFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); + break; + case TarEntryFormat.Pax: + WritePaxFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); + break; + case TarEntryFormat.Gnu: + WriteGnuFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); + break; + } archiveStream.Write(buffer); @@ -43,56 +57,32 @@ internal void WriteAsV7(Stream archiveStream, Span buffer) } else // seekable archive stream, unseekable data stream { - WriteAsV7WithUnseekableDataStream(archiveStream, buffer); + WriteWithUnseekableStreamAs(format, archiveStream, buffer); } } - // Writes the entry in the order required to be able to obtain the unseekable data stream size. - private void WriteAsV7WithUnseekableDataStream(Stream archiveStream, Span buffer) - { - // When the data stream is unseekable, the order in which we write the entry data changes - Debug.Assert(archiveStream.CanSeek); - Debug.Assert(_dataStream != null); - - // Store the start of the current entry's header, it'll be used later - long headerStartPosition = archiveStream.Position; - - // We know the exact location where the data starts depending on the format - long dataStartPosition = headerStartPosition + FieldLocations.V7Data; - - // Move to the data start location and write the data - archiveStream.Seek(FieldLocations.V7Data, SeekOrigin.Current); - _dataStream.CopyTo(archiveStream); // The data gets copied from the current position - - // Get the new archive stream position, and the difference is the size of the data stream - long dataEndPosition = archiveStream.Position; - long actualLength = dataEndPosition - dataStartPosition; - - // Write the padding now so that we can go back to writing the entry's header metadata - WritePadding(archiveStream, actualLength); - - // Store the end of the current header, we will write the next one after this position - long endOfHeaderPosition = archiveStream.Position; - - // Go back to the start of the entry header to write the rest of the fields - archiveStream.Position = headerStartPosition; - WriteV7FieldsToBuffer(actualLength, buffer); - archiveStream.Write(buffer); - - // Finally, move to the end of the header to continue with the next entry - archiveStream.Position = endOfHeaderPosition; - } - - // Asynchronously writes the current header as a V7 entry into the archive stream and returns the value of the final checksum. - internal async Task WriteAsV7Async(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + internal async Task WriteAsAsync(TarEntryFormat format, Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { - cancellationToken.ThrowIfCancellationRequested(); - + Debug.Assert(format > TarEntryFormat.Unknown && format <= TarEntryFormat.Gnu); Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter { - WriteV7FieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); + switch (format) + { + case TarEntryFormat.V7: + WriteV7FieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); + break; + case TarEntryFormat.Ustar: + WriteUstarFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); + break; + case TarEntryFormat.Pax: + WritePaxFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); + break; + case TarEntryFormat.Gnu: + WriteGnuFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); + break; + } await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); @@ -103,12 +93,11 @@ internal async Task WriteAsV7Async(Stream archiveStream, Memory buffer, Ca } else // seekable archive stream, unseekable data stream { - await WriteAsV7WithUnseekableDataStreamAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + await WriteWithUnseekableDataStreamAsAsync(format, archiveStream, buffer, cancellationToken).ConfigureAwait(false); } } - // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. - private async Task WriteAsV7WithUnseekableDataStreamAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + private void WriteWithUnseekableStreamAs(TarEntryFormat format, Stream archiveStream, Span buffer) { // When the data stream is unseekable, the order in which we write the entry data changes Debug.Assert(archiveStream.CanSeek); @@ -117,67 +106,58 @@ private async Task WriteAsV7WithUnseekableDataStreamAsync(Stream archiveStream, // Store the start of the current entry's header, it'll be used later long headerStartPosition = archiveStream.Position; + ushort dataLocation = format switch + { + TarEntryFormat.V7 => FieldLocations.V7Data, + TarEntryFormat.Ustar or TarEntryFormat.Pax => FieldLocations.PosixData, + TarEntryFormat.Gnu => FieldLocations.GnuData, + _ => throw new ArgumentOutOfRangeException(nameof(format)) + }; + // We know the exact location where the data starts depending on the format - long dataStartPosition = headerStartPosition + FieldLocations.V7Data; + long dataStartPosition = headerStartPosition + dataLocation; // Move to the data start location and write the data - archiveStream.Seek(FieldLocations.V7Data, SeekOrigin.Current); - await _dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position + archiveStream.Seek(dataLocation, SeekOrigin.Current); + _dataStream.CopyTo(archiveStream); // The data gets copied from the current position // Get the new archive stream position, and the difference is the size of the data stream long dataEndPosition = archiveStream.Position; long actualLength = dataEndPosition - dataStartPosition; // Write the padding now so that we can go back to writing the entry's header metadata - await WritePaddingAsync(archiveStream, actualLength, cancellationToken).ConfigureAwait(false); + WritePadding(archiveStream, actualLength); // Store the end of the current header, we will write the next one after this position long endOfHeaderPosition = archiveStream.Position; // Go back to the start of the entry header to write the rest of the fields archiveStream.Position = headerStartPosition; - WriteV7FieldsToBuffer(actualLength, buffer.Span); - await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); - - // Finally, move to the end of the header to continue with the next entry - archiveStream.Position = endOfHeaderPosition; - } - // Writes the V7 header fields to the specified buffer, calculates and writes the checksum, then returns the final data length. - private void WriteV7FieldsToBuffer(long size, Span buffer) - { - _size = size; - TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.V7, _typeFlag); - - int tmpChecksum = WriteName(buffer); - tmpChecksum += WriteCommonFields(buffer, actualEntryType); - _checksum = WriteChecksum(tmpChecksum, buffer); - } - - // Writes the current header as a Ustar entry into the archive stream. - internal void WriteAsUstar(Stream archiveStream, Span buffer) - { - Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - - if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter + switch (format) { - WriteUstarFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); + case TarEntryFormat.V7: + WriteV7FieldsToBuffer(actualLength, buffer); + break; + case TarEntryFormat.Ustar: + WriteUstarFieldsToBuffer(actualLength, buffer); + break; + case TarEntryFormat.Pax: + WritePaxFieldsToBuffer(actualLength, buffer); + break; + case TarEntryFormat.Gnu: + WriteGnuFieldsToBuffer(actualLength, buffer); + break; + } - archiveStream.Write(buffer); + archiveStream.Write(buffer); - if (_dataStream != null) - { - WriteData(archiveStream, _dataStream, _size); - } - } - else // seekable archive stream, unseekable data stream - { - WriteAsUstarWithUnseekableDataStream(archiveStream, buffer); - } + // Finally, move to the end of the header to continue with the next entry + archiveStream.Position = endOfHeaderPosition; } - // Writes the entry in the order required to be able to obtain the unseekable data stream size. - private void WriteAsUstarWithUnseekableDataStream(Stream archiveStream, Span buffer) + // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. + private async Task WriteWithUnseekableDataStreamAsAsync(TarEntryFormat format, Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { // When the data stream is unseekable, the order in which we write the entry data changes Debug.Assert(archiveStream.CanSeek); @@ -186,53 +166,65 @@ private void WriteAsUstarWithUnseekableDataStream(Stream archiveStream, Span FieldLocations.V7Data, + TarEntryFormat.Ustar or TarEntryFormat.Pax => FieldLocations.PosixData, + TarEntryFormat.Gnu => FieldLocations.GnuData, + _ => throw new ArgumentOutOfRangeException(nameof(format)) + }; + // We know the exact location where the data starts depending on the format - long dataStartPosition = headerStartPosition + FieldLocations.PosixData; + long dataStartPosition = headerStartPosition + dataLocation; // Move to the data start location and write the data - archiveStream.Seek(FieldLocations.PosixData, SeekOrigin.Current); - _dataStream.CopyTo(archiveStream); // The data gets copied from the current position + archiveStream.Seek(dataLocation, SeekOrigin.Current); + await _dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position // Get the new archive stream position, and the difference is the size of the data stream long dataEndPosition = archiveStream.Position; long actualLength = dataEndPosition - dataStartPosition; // Write the padding now so that we can go back to writing the entry's header metadata - WritePadding(archiveStream, actualLength); + await WritePaddingAsync(archiveStream, actualLength, cancellationToken).ConfigureAwait(false); // Store the end of the current header, we will write the next one after this position long endOfHeaderPosition = archiveStream.Position; // Go back to the start of the entry header to write the rest of the fields archiveStream.Position = headerStartPosition; - WriteUstarFieldsToBuffer(actualLength, buffer); - archiveStream.Write(buffer); + + switch (format) + { + case TarEntryFormat.V7: + WriteV7FieldsToBuffer(actualLength, buffer.Span); + break; + case TarEntryFormat.Ustar: + WriteUstarFieldsToBuffer(actualLength, buffer.Span); + break; + case TarEntryFormat.Pax: + WritePaxFieldsToBuffer(actualLength, buffer.Span); + break; + case TarEntryFormat.Gnu: + WriteGnuFieldsToBuffer(actualLength, buffer.Span); + break; + } + + await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); // Finally, move to the end of the header to continue with the next entry archiveStream.Position = endOfHeaderPosition; } - // Asynchronously rites the current header as a Ustar entry into the archive stream and returns the value of the final checksum. - internal async Task WriteAsUstarAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + // Writes the V7 header fields to the specified buffer, calculates and writes the checksum, then returns the final data length. + private void WriteV7FieldsToBuffer(long size, Span buffer) { - cancellationToken.ThrowIfCancellationRequested(); - Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - - if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter - { - WriteUstarFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); - - await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); + _size = size; + TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.V7, _typeFlag); - if (_dataStream != null) - { - await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); - } - } - else // seekable archive stream, unseekable data stream - { - await WriteAsUstarWithUnseekableDataStreamAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); - } + int tmpChecksum = WriteName(buffer); + tmpChecksum += WriteCommonFields(buffer, actualEntryType); + _checksum = WriteChecksum(tmpChecksum, buffer); } // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. @@ -416,86 +408,10 @@ private static TarHeader GetGnuLongMetadataHeader(TarEntryType entryType, string } // Writes the current header as a GNU entry into the archive stream. - internal void WriteAsGnuInternal(Stream archiveStream, Span buffer) - { - Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - - if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter - { - WriteGnuFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); - - archiveStream.Write(buffer); - - if (_dataStream != null) - { - WriteData(archiveStream, _dataStream, _size); - } - } - else // seekable archive stream, unseekable data stream - { - WriteAsGnuWithUnseekableDataStream(archiveStream, buffer); - } - } - - // Writes the entry in the order required to be able to obtain the unseekable data stream size. - private void WriteAsGnuWithUnseekableDataStream(Stream archiveStream, Span buffer) - { - // When the data stream is unseekable, the order in which we write the entry data changes - Debug.Assert(archiveStream.CanSeek); - Debug.Assert(_dataStream != null); - - // Store the start of the current entry's header, it'll be used later - long headerStartPosition = archiveStream.Position; - - // We know the exact location where the data starts depending on the format - long dataStartPosition = headerStartPosition + FieldLocations.GnuData; - - // Move to the data start location and write the data - archiveStream.Seek(FieldLocations.GnuData, SeekOrigin.Current); - _dataStream.CopyTo(archiveStream); // The data gets copied from the current position - - // Get the new archive stream position, and the difference is the size of the data stream - long dataEndPosition = archiveStream.Position; - long actualLength = dataEndPosition - dataStartPosition; - - // Write the padding now so that we can go back to writing the entry's header metadata - WritePadding(archiveStream, actualLength); - - // Store the end of the current header, we will write the next one after this position - long endOfHeaderPosition = archiveStream.Position; - - // Go back to the start of the entry header to write the rest of the fields - archiveStream.Position = headerStartPosition; - WriteGnuFieldsToBuffer(actualLength, buffer); - archiveStream.Write(buffer); - - // Finally, move to the end of the header to continue with the next entry - archiveStream.Position = endOfHeaderPosition; - } + internal void WriteAsGnuInternal(Stream archiveStream, Span buffer) => WriteAs(TarEntryFormat.Gnu, archiveStream, buffer); // Asynchronously writes the current header as a GNU entry into the archive stream. - internal async Task WriteAsGnuInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); - Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - - if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter - { - - WriteGnuFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); - - await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); - - if (_dataStream != null) - { - await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); - } - } - else // seekable archive stream, unseekable data stream - { - await WriteAsGnuWithUnseekableDataStreamAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); - } - } + internal Task WriteAsGnuInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) => WriteAsAsync(TarEntryFormat.Gnu, archiveStream, buffer, cancellationToken); // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. private async Task WriteAsGnuWithUnseekableDataStreamAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) @@ -579,87 +495,11 @@ private void WriteAsPaxExtendedAttributesShared(bool isGea, int globalExtendedAt // Both the Extended Attributes and Global Extended Attributes entry headers are written in a similar way, just the data changes // This method writes an entry as both entries require, using the data from the current header instance. - private void WriteAsPaxInternal(Stream archiveStream, Span buffer) - { - Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - - if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter - { - WritePaxFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); - - archiveStream.Write(buffer); - - if (_dataStream != null) - { - WriteData(archiveStream, _dataStream, _size); - } - } - else // seekable archive stream, unseekable data stream - { - WriteAsPaxWithUnseekableDataStream(archiveStream, buffer); - } - } - - // Writes the entry in the order required to be able to obtain the unseekable data stream size. - private void WriteAsPaxWithUnseekableDataStream(Stream archiveStream, Span buffer) - { - // When the data stream is unseekable, the order in which we write the entry data changes - Debug.Assert(archiveStream.CanSeek); - Debug.Assert(_dataStream != null); - - // Store the start of the current entry's header, it'll be used later - long headerStartPosition = archiveStream.Position; - - // We know the exact location where the data starts depending on the format - long dataStartPosition = headerStartPosition + FieldLocations.PosixData; - - // Move to the data start location and write the data - archiveStream.Seek(FieldLocations.PosixData, SeekOrigin.Current); - _dataStream.CopyTo(archiveStream); // The data gets copied from the current position - - // Get the new archive stream position, and the difference is the size of the data stream - long dataEndPosition = archiveStream.Position; - long actualLength = dataEndPosition - dataStartPosition; - - // Write the padding now so that we can go back to writing the entry's header metadata - WritePadding(archiveStream, actualLength); - - // Store the end of the current header, we will write the next one after this position - long endOfHeaderPosition = archiveStream.Position; - - // Go back to the start of the entry header to write the rest of the fields - archiveStream.Position = headerStartPosition; - WritePaxFieldsToBuffer(actualLength, buffer); - archiveStream.Write(buffer); - - // Finally, move to the end of the header to continue with the next entry - archiveStream.Position = endOfHeaderPosition; - } + private void WriteAsPaxInternal(Stream archiveStream, Span buffer) => WriteAs(TarEntryFormat.Pax, archiveStream, buffer); // Both the Extended Attributes and Global Extended Attributes entry headers are written in a similar way, just the data changes // This method asynchronously writes an entry as both entries require, using the data from the current header instance. - private async Task WriteAsPaxInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); - - Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - - if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter - { - WritePaxFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); - - await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); - - if (_dataStream != null) - { - await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); - } - } - else // seekable archive stream, unseekable data stream - { - await WriteAsPaxWithUnseekableDataStreamAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); - } - } + private Task WriteAsPaxInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) => WriteAsAsync(TarEntryFormat.Pax, archiveStream, buffer, cancellationToken); // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. private async Task WriteAsPaxWithUnseekableDataStreamAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs index df77341be9a039..7e744ceb060f03 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs @@ -283,12 +283,8 @@ private void WriteEntryInternal(TarEntry entry) switch (entry.Format) { - case TarEntryFormat.V7: - entry._header.WriteAsV7(_archiveStream, buffer); - break; - - case TarEntryFormat.Ustar: - entry._header.WriteAsUstar(_archiveStream, buffer); + case TarEntryFormat.V7 or TarEntryFormat.Ustar: + entry._header.WriteAs(entry.Format, _archiveStream, buffer); break; case TarEntryFormat.Pax: From 15523acb9d2a9eca3bcedab198c77bc91dce8e02 Mon Sep 17 00:00:00 2001 From: carlossanlop <1175054+carlossanlop@users.noreply.github.com> Date: Mon, 3 Apr 2023 20:39:43 -0600 Subject: [PATCH 09/22] More reuse, remove unused and not needed. --- .../src/System/Formats/Tar/TarHeader.Write.cs | 106 ++---------------- .../src/System/Formats/Tar/TarWriter.cs | 3 +- 2 files changed, 11 insertions(+), 98 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 88ea62b0a43184..b212d23e507d1d 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -317,7 +317,7 @@ internal void WriteAsPax(Stream archiveStream, Span buffer) extendedAttributesHeader.WriteAsPaxExtendedAttributes(archiveStream, buffer, ExtendedAttributes, isGea: false, globalExtendedAttributesEntryNumber: -1); buffer.Clear(); // Reset it to reuse it // Second, we write this header as a normal one - WriteAsPaxInternal(archiveStream, buffer); + WriteAs(TarEntryFormat.Pax, archiveStream, buffer); } // Asynchronously writes the current header as a PAX entry into the archive stream. @@ -336,7 +336,7 @@ internal async Task WriteAsPaxAsync(Stream archiveStream, Memory buffer, C buffer.Span.Clear(); // Reset it to reuse it // Second, we write this header as a normal one - await WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + await WriteAsAsync(TarEntryFormat.Pax, archiveStream, buffer, cancellationToken).ConfigureAwait(false); } // Writes the current header as a Gnu entry into the archive stream. @@ -347,7 +347,7 @@ internal void WriteAsGnu(Stream archiveStream, Span buffer) if (_linkName != null && Encoding.UTF8.GetByteCount(_linkName) > FieldLengths.LinkName) { TarHeader longLinkHeader = GetGnuLongMetadataHeader(TarEntryType.LongLink, _linkName); - longLinkHeader.WriteAsGnuInternal(archiveStream, buffer); + longLinkHeader.WriteAs(TarEntryFormat.Gnu, archiveStream, buffer); buffer.Clear(); // Reset it to reuse it } @@ -355,12 +355,12 @@ internal void WriteAsGnu(Stream archiveStream, Span buffer) if (Encoding.UTF8.GetByteCount(_name) > FieldLengths.Name) { TarHeader longPathHeader = GetGnuLongMetadataHeader(TarEntryType.LongPath, _name); - longPathHeader.WriteAsGnuInternal(archiveStream, buffer); + longPathHeader.WriteAs(TarEntryFormat.Gnu, archiveStream, buffer); buffer.Clear(); // Reset it to reuse it } // Third, we write this header as a normal one - WriteAsGnuInternal(archiveStream, buffer); + WriteAs(TarEntryFormat.Gnu, archiveStream, buffer); } // Writes the current header as a Gnu entry into the archive stream. @@ -373,7 +373,7 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, C if (_linkName != null && Encoding.UTF8.GetByteCount(_linkName) > FieldLengths.LinkName) { TarHeader longLinkHeader = GetGnuLongMetadataHeader(TarEntryType.LongLink, _linkName); - await longLinkHeader.WriteAsGnuInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + await longLinkHeader.WriteAsAsync(TarEntryFormat.Gnu, archiveStream, buffer, cancellationToken).ConfigureAwait(false); buffer.Span.Clear(); // Reset it to reuse it } @@ -381,12 +381,12 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, C if (Encoding.UTF8.GetByteCount(_name) > FieldLengths.Name) { TarHeader longPathHeader = GetGnuLongMetadataHeader(TarEntryType.LongPath, _name); - await longPathHeader.WriteAsGnuInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + await longPathHeader.WriteAsAsync(TarEntryFormat.Gnu, archiveStream, buffer, cancellationToken).ConfigureAwait(false); buffer.Span.Clear(); // Reset it to reuse it } // Third, we write this header as a normal one - await WriteAsGnuInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + await WriteAsAsync(TarEntryFormat.Gnu, archiveStream, buffer, cancellationToken).ConfigureAwait(false); } // Creates and returns a GNU long metadata header, with the specified long text written into its data stream. @@ -407,48 +407,6 @@ private static TarHeader GetGnuLongMetadataHeader(TarEntryType entryType, string return longMetadataHeader; } - // Writes the current header as a GNU entry into the archive stream. - internal void WriteAsGnuInternal(Stream archiveStream, Span buffer) => WriteAs(TarEntryFormat.Gnu, archiveStream, buffer); - - // Asynchronously writes the current header as a GNU entry into the archive stream. - internal Task WriteAsGnuInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) => WriteAsAsync(TarEntryFormat.Gnu, archiveStream, buffer, cancellationToken); - - // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. - private async Task WriteAsGnuWithUnseekableDataStreamAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) - { - // When the data stream is unseekable, the order in which we write the entry data changes - Debug.Assert(archiveStream.CanSeek); - Debug.Assert(_dataStream != null); - - // Store the start of the current entry's header, it'll be used later - long headerStartPosition = archiveStream.Position; - - // We know the exact location where the data starts depending on the format - long dataStartPosition = headerStartPosition + FieldLocations.GnuData; - - // Move to the data start location and write the data - archiveStream.Seek(FieldLocations.GnuData, SeekOrigin.Current); - await _dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position - - // Get the new archive stream position, and the difference is the size of the data stream - long dataEndPosition = archiveStream.Position; - long actualLength = dataEndPosition - dataStartPosition; - - // Write the padding now so that we can go back to writing the entry's header metadata - await WritePaddingAsync(archiveStream, actualLength, cancellationToken).ConfigureAwait(false); - - // Store the end of the current header, we will write the next one after this position - long endOfHeaderPosition = archiveStream.Position; - - // Go back to the start of the entry header to write the rest of the fields - archiveStream.Position = headerStartPosition; - WriteGnuFieldsToBuffer(actualLength, buffer.Span); - await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); - - // Finally, move to the end of the header to continue with the next entry - archiveStream.Position = endOfHeaderPosition; - } - // Shared checksum and data length calculations for GNU entry writing. private void WriteGnuFieldsToBuffer(long size, Span buffer) { @@ -467,7 +425,7 @@ private void WriteGnuFieldsToBuffer(long size, Span buffer) private void WriteAsPaxExtendedAttributes(Stream archiveStream, Span buffer, Dictionary extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber) { WriteAsPaxExtendedAttributesShared(isGea, globalExtendedAttributesEntryNumber, extendedAttributes); - WriteAsPaxInternal(archiveStream, buffer); + WriteAs(TarEntryFormat.Pax, archiveStream, buffer); } // Asynchronously writes the current header as a PAX Extended Attributes entry into the archive stream and returns the value of the final checksum. @@ -475,7 +433,7 @@ private Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory buffer) => WriteAs(TarEntryFormat.Pax, archiveStream, buffer); - - // Both the Extended Attributes and Global Extended Attributes entry headers are written in a similar way, just the data changes - // This method asynchronously writes an entry as both entries require, using the data from the current header instance. - private Task WriteAsPaxInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) => WriteAsAsync(TarEntryFormat.Pax, archiveStream, buffer, cancellationToken); - - // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. - private async Task WriteAsPaxWithUnseekableDataStreamAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) - { - // When the data stream is unseekable, the order in which we write the entry data changes - Debug.Assert(archiveStream.CanSeek); - Debug.Assert(_dataStream != null); - - // Store the start of the current entry's header, it'll be used later - long headerStartPosition = archiveStream.Position; - - // We know the exact location where the data starts depending on the format - long dataStartPosition = headerStartPosition + FieldLocations.PosixData; - - // Move to the data start location and write the data - archiveStream.Seek(FieldLocations.PosixData, SeekOrigin.Current); - await _dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position - - // Get the new archive stream position, and the difference is the size of the data stream - long dataEndPosition = archiveStream.Position; - long actualLength = dataEndPosition - dataStartPosition; - - // Write the padding now so that we can go back to writing the entry's header metadata - await WritePaddingAsync(archiveStream, actualLength, cancellationToken).ConfigureAwait(false); - - // Store the end of the current header, we will write the next one after this position - long endOfHeaderPosition = archiveStream.Position; - - // Go back to the start of the entry header to write the rest of the fields - archiveStream.Position = headerStartPosition; - WritePaxFieldsToBuffer(actualLength, buffer.Span); - await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); - - // Finally, move to the end of the header to continue with the next entry - archiveStream.Position = endOfHeaderPosition; - } - // Shared checksum and data length calculations for PAX entry writing. private void WritePaxFieldsToBuffer(long size, Span buffer) { diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs index 7e744ceb060f03..13654eaa7279e6 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs @@ -321,8 +321,7 @@ private async Task WriteEntryAsyncInternal(TarEntry entry, CancellationToken can Task task = entry.Format switch { - TarEntryFormat.V7 => entry._header.WriteAsV7Async(_archiveStream, buffer, cancellationToken), - TarEntryFormat.Ustar => entry._header.WriteAsUstarAsync(_archiveStream, buffer, cancellationToken), + TarEntryFormat.V7 or TarEntryFormat.Ustar => entry._header.WriteAsAsync(entry.Format, _archiveStream, buffer, cancellationToken), TarEntryFormat.Pax when entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes => entry._header.WriteAsPaxGlobalExtendedAttributesAsync(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++, cancellationToken), TarEntryFormat.Pax => entry._header.WriteAsPaxAsync(_archiveStream, buffer, cancellationToken), TarEntryFormat.Gnu => entry._header.WriteAsGnuAsync(_archiveStream, buffer, cancellationToken), From 5979be937ee744b814f0889693394c229560ccea Mon Sep 17 00:00:00 2001 From: carlossanlop <1175054+carlossanlop@users.noreply.github.com> Date: Tue, 4 Apr 2023 09:32:20 -0600 Subject: [PATCH 10/22] Remove TarFile.CreateFromDirectoryAsync.File.Roundtrip.cs. Submit it in a separate PR. --- .../System.Formats.Tar/tests/System.Formats.Tar.Tests.csproj | 1 - 1 file changed, 1 deletion(-) diff --git a/src/libraries/System.Formats.Tar/tests/System.Formats.Tar.Tests.csproj b/src/libraries/System.Formats.Tar/tests/System.Formats.Tar.Tests.csproj index cffabe715ed098..ac75d0d7416db3 100644 --- a/src/libraries/System.Formats.Tar/tests/System.Formats.Tar.Tests.csproj +++ b/src/libraries/System.Formats.Tar/tests/System.Formats.Tar.Tests.csproj @@ -18,7 +18,6 @@ - From b702a44b4af71863e381782cbe00a09a65d27087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Tue, 23 May 2023 13:53:48 -0700 Subject: [PATCH 11/22] Remove unnecessary resx comments. --- .../src/Resources/Strings.resx | 59 ------------------- 1 file changed, 59 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx index b86d4b287384a1..1c5a56c4da1496 100644 --- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx +++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx @@ -1,64 +1,5 @@  - From 625a6193c8d1f9ee4e2dbac8ec844589f7da6e5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Tue, 23 May 2023 14:26:12 -0700 Subject: [PATCH 12/22] Dedicated method for writing fields to buffer depending on the format. --- .../src/System/Formats/Tar/TarHeader.Write.cs | 90 ++++++------------- 1 file changed, 26 insertions(+), 64 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index b212d23e507d1d..aa0862d966aaf8 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -32,22 +32,8 @@ internal void WriteAs(TarEntryFormat format, Stream archiveStream, Span bu if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter { - switch (format) - { - case TarEntryFormat.V7: - WriteV7FieldsToBuffer(GetTotalDataBytesToWrite(), buffer); - break; - case TarEntryFormat.Ustar: - WriteUstarFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); - break; - case TarEntryFormat.Pax: - WritePaxFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); - break; - case TarEntryFormat.Gnu: - WriteGnuFieldsToBuffer(GetTotalDataBytesToWrite(), buffer); - break; - } - + long bytesToWrite = GetTotalDataBytesToWrite(); + WriteFieldsToBuffer(format, bytesToWrite, buffer); archiveStream.Write(buffer); if (_dataStream != null) @@ -68,22 +54,8 @@ internal async Task WriteAsAsync(TarEntryFormat format, Stream archiveStream, Me if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter { - switch (format) - { - case TarEntryFormat.V7: - WriteV7FieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); - break; - case TarEntryFormat.Ustar: - WriteUstarFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); - break; - case TarEntryFormat.Pax: - WritePaxFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); - break; - case TarEntryFormat.Gnu: - WriteGnuFieldsToBuffer(GetTotalDataBytesToWrite(), buffer.Span); - break; - } - + long bytesToWrite = GetTotalDataBytesToWrite(); + WriteFieldsToBuffer(format, bytesToWrite, buffer.Span); await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); if (_dataStream != null) @@ -134,22 +106,7 @@ private void WriteWithUnseekableStreamAs(TarEntryFormat format, Stream archiveSt // Go back to the start of the entry header to write the rest of the fields archiveStream.Position = headerStartPosition; - switch (format) - { - case TarEntryFormat.V7: - WriteV7FieldsToBuffer(actualLength, buffer); - break; - case TarEntryFormat.Ustar: - WriteUstarFieldsToBuffer(actualLength, buffer); - break; - case TarEntryFormat.Pax: - WritePaxFieldsToBuffer(actualLength, buffer); - break; - case TarEntryFormat.Gnu: - WriteGnuFieldsToBuffer(actualLength, buffer); - break; - } - + WriteFieldsToBuffer(format, actualLength, buffer); archiveStream.Write(buffer); // Finally, move to the end of the header to continue with the next entry @@ -194,22 +151,7 @@ private async Task WriteWithUnseekableDataStreamAsAsync(TarEntryFormat format, S // Go back to the start of the entry header to write the rest of the fields archiveStream.Position = headerStartPosition; - switch (format) - { - case TarEntryFormat.V7: - WriteV7FieldsToBuffer(actualLength, buffer.Span); - break; - case TarEntryFormat.Ustar: - WriteUstarFieldsToBuffer(actualLength, buffer.Span); - break; - case TarEntryFormat.Pax: - WritePaxFieldsToBuffer(actualLength, buffer.Span); - break; - case TarEntryFormat.Gnu: - WriteGnuFieldsToBuffer(actualLength, buffer.Span); - break; - } - + WriteFieldsToBuffer(format, actualLength, buffer.Span); await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); // Finally, move to the end of the header to continue with the next entry @@ -463,6 +405,26 @@ private void WritePaxFieldsToBuffer(long size, Span buffer) _checksum = WriteChecksum(tmpChecksum, buffer); } + // Writes the format-specific fields of the current entry, as well as the entry data length, into the specified buffer. + private void WriteFieldsToBuffer(TarEntryFormat format, long bytesToWrite, Span buffer) + { + switch (format) + { + case TarEntryFormat.V7: + WriteV7FieldsToBuffer(bytesToWrite, buffer); + break; + case TarEntryFormat.Ustar: + WriteUstarFieldsToBuffer(bytesToWrite, buffer); + break; + case TarEntryFormat.Pax: + WritePaxFieldsToBuffer(bytesToWrite, buffer); + break; + case TarEntryFormat.Gnu: + WriteGnuFieldsToBuffer(bytesToWrite, buffer); + break; + } + } + // Gnu and pax save in the name byte array only the UTF8 bytes that fit. // V7 does not support more than 100 bytes so it throws. private int WriteName(Span buffer) From 4f702c4625f2eaae16f38956f563d3be2879eaa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Tue, 23 May 2023 15:03:06 -0700 Subject: [PATCH 13/22] Specify `Data` in name of method that expects unseekable data stream. Add extra debug asserts. --- .../src/System/Formats/Tar/TarHeader.Write.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index aa0862d966aaf8..b9ed7033ef9559 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -43,7 +43,7 @@ internal void WriteAs(TarEntryFormat format, Stream archiveStream, Span bu } else // seekable archive stream, unseekable data stream { - WriteWithUnseekableStreamAs(format, archiveStream, buffer); + WriteWithUnseekableDataStreamAs(format, archiveStream, buffer); } } @@ -69,11 +69,12 @@ internal async Task WriteAsAsync(TarEntryFormat format, Stream archiveStream, Me } } - private void WriteWithUnseekableStreamAs(TarEntryFormat format, Stream archiveStream, Span buffer) + private void WriteWithUnseekableDataStreamAs(TarEntryFormat format, Stream archiveStream, Span buffer) { // When the data stream is unseekable, the order in which we write the entry data changes Debug.Assert(archiveStream.CanSeek); Debug.Assert(_dataStream != null); + Debug.Assert(!_dataStream.CanSeek); // Store the start of the current entry's header, it'll be used later long headerStartPosition = archiveStream.Position; @@ -119,6 +120,7 @@ private async Task WriteWithUnseekableDataStreamAsAsync(TarEntryFormat format, S // When the data stream is unseekable, the order in which we write the entry data changes Debug.Assert(archiveStream.CanSeek); Debug.Assert(_dataStream != null); + Debug.Assert(!_dataStream.CanSeek); // Store the start of the current entry's header, it'll be used later long headerStartPosition = archiveStream.Position; From 0054f990c495f69286bb8809bb44aab45aa0107b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Tue, 23 May 2023 15:22:18 -0700 Subject: [PATCH 14/22] Delete unnecessary method. --- .../src/System/Formats/Tar/TarHeader.Write.cs | 36 ------------------- 1 file changed, 36 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index b9ed7033ef9559..75a446f834b507 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -171,42 +171,6 @@ private void WriteV7FieldsToBuffer(long size, Span buffer) _checksum = WriteChecksum(tmpChecksum, buffer); } - // Asynchronously writes the entry in the order required to be able to obtain the unseekable data stream size. - private async Task WriteAsUstarWithUnseekableDataStreamAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) - { - // When the data stream is unseekable, the order in which we write the entry data changes - Debug.Assert(archiveStream.CanSeek); - Debug.Assert(_dataStream != null); - - // Store the start of the current entry's header, it'll be used later - long headerStartPosition = archiveStream.Position; - - // We know the exact location where the data starts depending on the format - long dataStartPosition = headerStartPosition + FieldLocations.PosixData; - - // Move to the data start location and write the data - archiveStream.Seek(FieldLocations.PosixData, SeekOrigin.Current); - await _dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position - - // Get the new archive stream position, and the difference is the size of the data stream - long dataEndPosition = archiveStream.Position; - long actualLength = dataEndPosition - dataStartPosition; - - // Write the padding now so that we can go back to writing the entry's header metadata - await WritePaddingAsync(archiveStream, actualLength, cancellationToken).ConfigureAwait(false); - - // Store the end of the current header, we will write the next one after this position - long endOfHeaderPosition = archiveStream.Position; - - // Go back to the start of the entry header to write the rest of the fields - archiveStream.Position = headerStartPosition; - WriteUstarFieldsToBuffer(actualLength, buffer.Span); - await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); - - // Finally, move to the end of the header to continue with the next entry - archiveStream.Position = endOfHeaderPosition; - } - // Writes the Ustar header fields to the specified buffer, calculates and writes the checksum, then returns the final data length. private void WriteUstarFieldsToBuffer(long size, Span buffer) { From 2fc23de0ec35abe643a83a6dba5f83bf83f7c518 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Tue, 23 May 2023 15:31:54 -0700 Subject: [PATCH 15/22] Rename WritePadding to WriteEmptyPadding --- .../src/System/Formats/Tar/TarHeader.Write.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 75a446f834b507..949ae859413a3c 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -99,7 +99,7 @@ private void WriteWithUnseekableDataStreamAs(TarEntryFormat format, Stream archi long actualLength = dataEndPosition - dataStartPosition; // Write the padding now so that we can go back to writing the entry's header metadata - WritePadding(archiveStream, actualLength); + WriteEmptyPadding(archiveStream, actualLength); // Store the end of the current header, we will write the next one after this position long endOfHeaderPosition = archiveStream.Position; @@ -145,7 +145,7 @@ private async Task WriteWithUnseekableDataStreamAsAsync(TarEntryFormat format, S long actualLength = dataEndPosition - dataStartPosition; // Write the padding now so that we can go back to writing the entry's header metadata - await WritePaddingAsync(archiveStream, actualLength, cancellationToken).ConfigureAwait(false); + await WriteEmptyPaddingAsync(archiveStream, actualLength, cancellationToken).ConfigureAwait(false); // Store the end of the current header, we will write the next one after this position long endOfHeaderPosition = archiveStream.Position; @@ -651,11 +651,11 @@ private int WriteGnuFields(Span buffer) private static void WriteData(Stream archiveStream, Stream dataStream, long actualLength) { dataStream.CopyTo(archiveStream); // The data gets copied from the current position - WritePadding(archiveStream, actualLength); + WriteEmptyPadding(archiveStream, actualLength); } // Calculates the padding for the current entry and writes it after the data. - private static void WritePadding(Stream archiveStream, long actualLength) + private static void WriteEmptyPadding(Stream archiveStream, long actualLength) { int paddingAfterData = TarHelpers.CalculatePadding(actualLength); if (paddingAfterData != 0) @@ -671,7 +671,7 @@ private static void WritePadding(Stream archiveStream, long actualLength) } // Calculates the padding for the current entry and asynchronously writes it after the data. - private static ValueTask WritePaddingAsync(Stream archiveStream, long actualLength, CancellationToken cancellationToken) + private static ValueTask WriteEmptyPaddingAsync(Stream archiveStream, long actualLength, CancellationToken cancellationToken) { int paddingAfterData = TarHelpers.CalculatePadding(actualLength); if (paddingAfterData != 0) From d81b5a834b46741544fd15f0c16f1926f1f5ceac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Tue, 23 May 2023 20:22:55 -0700 Subject: [PATCH 16/22] Rename test variables --- .../TarFile.ExtractToDirectory.Stream.Tests.cs | 14 +++++++------- ...TarFile.ExtractToDirectoryAsync.Stream.Tests.cs | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs index 6120820e1992d5..bf3cec4fcb0832 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs @@ -230,8 +230,8 @@ public void UnseekableStreams_RoundTrip(TestTarFormat testFormat) // Size of files in many_small_files.tar are expected to be tiny and all equal int bufferLength = 1024; - byte[] buffer1 = new byte[bufferLength]; - byte[] buffer2 = new byte[bufferLength]; + byte[] fileContent = new byte[bufferLength]; + byte[] dataStreamContent = new byte[bufferLength]; TarEntry entry = reader.GetNextEntry(); do { @@ -253,13 +253,13 @@ public void UnseekableStreams_RoundTrip(TestTarFormat testFormat) Assert.Equal(fileData.Length, entry.Length); - Array.Clear(buffer1); - Array.Clear(buffer2); + Array.Clear(fileContent); + Array.Clear(dataStreamContent); - fileData.ReadExactly(buffer1, 0, (int)entry.Length); - entry.DataStream.ReadExactly(buffer2, 0, (int)entry.Length); + fileData.ReadExactly(fileContent, 0, (int)entry.Length); + entry.DataStream.ReadExactly(dataStreamContent, 0, (int)entry.Length); - AssertExtensions.SequenceEqual(buffer1, buffer2); + AssertExtensions.SequenceEqual(fileContent, dataStreamContent); } } while ((entry = reader.GetNextEntry()) != null); diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs index f50ead6ff11ac8..307a165099f4fe 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs @@ -295,8 +295,8 @@ public async Task UnseekableStreams_RoundTrip_Async(TestTarFormat testFormat) // Size of files in many_small_files.tar are expected to be tiny and all equal int bufferLength = 1024; - byte[] buffer1 = new byte[bufferLength]; - byte[] buffer2 = new byte[bufferLength]; + byte[] fileContent = new byte[bufferLength]; + byte[] dataStreamContent = new byte[bufferLength]; TarEntry entry = await reader.GetNextEntryAsync(); do { @@ -318,13 +318,13 @@ public async Task UnseekableStreams_RoundTrip_Async(TestTarFormat testFormat) Assert.Equal(fileData.Length, entry.Length); - Array.Clear(buffer1); - Array.Clear(buffer2); + Array.Clear(fileContent); + Array.Clear(dataStreamContent); - await fileData.ReadExactlyAsync(buffer1, 0, (int)entry.Length); - await entry.DataStream.ReadExactlyAsync(buffer2, 0, (int)entry.Length); + await fileData.ReadExactlyAsync(fileContent, 0, (int)entry.Length); + await entry.DataStream.ReadExactlyAsync(dataStreamContent, 0, (int)entry.Length); - AssertExtensions.SequenceEqual(buffer1, buffer2); + AssertExtensions.SequenceEqual(fileContent, dataStreamContent); } } while ((entry = await reader.GetNextEntryAsync()) != null); From 796d542f88aeb1a8f5a9552fbf7df89e1b7adffe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Tue, 23 May 2023 20:25:25 -0700 Subject: [PATCH 17/22] Merge identical test arrays into one --- .../tests/TarWriter/TarWriter.WriteEntry.Tests.cs | 12 ++++++------ .../TarWriter/TarWriter.WriteEntryAsync.Tests.cs | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Tests.cs index 20a7ba1b180cdc..487c6ab04dbcd5 100644 --- a/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntry.Tests.cs @@ -554,9 +554,10 @@ public void WritingUnseekableDataStream_To_UnseekableArchiveStream_Throws(TarEnt [InlineData(TarEntryFormat.Gnu)] public void Write_TwoEntries_With_UnseekableDataStreams(TarEntryFormat entryFormat) { + byte[] expectedBytes = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }; + using MemoryStream internalDataStream1 = new(); - byte[] expectedBytes1 = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }; - internalDataStream1.Write(expectedBytes1.AsSpan()); + internalDataStream1.Write(expectedBytes.AsSpan()); internalDataStream1.Position = 0; TarEntryType fileEntryType = GetTarEntryTypeForTarEntryFormat(TarEntryType.RegularFile, entryFormat); @@ -566,8 +567,7 @@ public void Write_TwoEntries_With_UnseekableDataStreams(TarEntryFormat entryForm entry1.DataStream = unseekableDataStream1; using MemoryStream internalDataStream2 = new(); - byte[] expectedBytes2 = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }; - internalDataStream2.Write(expectedBytes2.AsSpan()); + internalDataStream2.Write(expectedBytes.AsSpan()); internalDataStream2.Position = 0; using WrappedStream unseekableDataStream2 = new(internalDataStream2, canRead: true, canWrite: false, canSeek: false); @@ -589,12 +589,12 @@ public void Write_TwoEntries_With_UnseekableDataStreams(TarEntryFormat entryForm TarEntry readEntry = reader.GetNextEntry(); Assert.NotNull(readEntry); readEntry.DataStream.ReadExactly(actualBytes); - Assert.Equal(expectedBytes1, actualBytes); + Assert.Equal(expectedBytes, actualBytes); readEntry = reader.GetNextEntry(); Assert.NotNull(readEntry); readEntry.DataStream.ReadExactly(actualBytes); - Assert.Equal(expectedBytes2, actualBytes); + Assert.Equal(expectedBytes, actualBytes); Assert.Null(reader.GetNextEntry()); } diff --git a/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntryAsync.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntryAsync.Tests.cs index 1274572f0009f1..45b26ad66e6005 100644 --- a/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntryAsync.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarWriter/TarWriter.WriteEntryAsync.Tests.cs @@ -473,9 +473,10 @@ public async Task WritingUnseekableDataStream_To_UnseekableArchiveStream_Throws_ [InlineData(TarEntryFormat.Gnu)] public async Task Write_TwoEntries_With_UnseekableDataStreams_Async(TarEntryFormat entryFormat) { + byte[] expectedBytes = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }; + await using MemoryStream internalDataStream1 = new(); - byte[] expectedBytes1 = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }; - await internalDataStream1.WriteAsync(expectedBytes1.AsMemory()); + await internalDataStream1.WriteAsync(expectedBytes.AsMemory()); internalDataStream1.Position = 0; TarEntryType fileEntryType = GetTarEntryTypeForTarEntryFormat(TarEntryType.RegularFile, entryFormat); @@ -485,8 +486,7 @@ public async Task Write_TwoEntries_With_UnseekableDataStreams_Async(TarEntryForm entry1.DataStream = unseekableDataStream1; await using MemoryStream internalDataStream2 = new(); - byte[] expectedBytes2 = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }; - await internalDataStream2.WriteAsync(expectedBytes2.AsMemory()); + await internalDataStream2.WriteAsync(expectedBytes.AsMemory()); internalDataStream2.Position = 0; await using WrappedStream unseekableDataStream2 = new(internalDataStream2, canRead: true, canWrite: false, canSeek: false); @@ -508,12 +508,12 @@ public async Task Write_TwoEntries_With_UnseekableDataStreams_Async(TarEntryForm TarEntry readEntry = await reader.GetNextEntryAsync(); Assert.NotNull(readEntry); await readEntry.DataStream.ReadExactlyAsync(actualBytes); - Assert.Equal(expectedBytes1, actualBytes); + Assert.Equal(expectedBytes, actualBytes); readEntry = await reader.GetNextEntryAsync(); Assert.NotNull(readEntry); await readEntry.DataStream.ReadExactlyAsync(actualBytes); - Assert.Equal(expectedBytes2, actualBytes); + Assert.Equal(expectedBytes, actualBytes); Assert.Null(await reader.GetNextEntryAsync()); } From 94157b0257ae529a301fd4570322170a4261e1cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Wed, 24 May 2023 13:01:04 -0600 Subject: [PATCH 18/22] Invert if else to be more clear about conditions --- .../src/System/Formats/Tar/TarHeader.Write.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 949ae859413a3c..ddc3c822a24587 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -30,7 +30,11 @@ internal void WriteAs(TarEntryFormat format, Stream archiveStream, Span bu Debug.Assert(format > TarEntryFormat.Unknown && format <= TarEntryFormat.Gnu); Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter + if (archiveStream.CanSeek && _dataStream is { CanSeek: false }) + { + WriteWithUnseekableDataStreamAs(format, archiveStream, buffer); + } + else // Seek status of archive does not matter { long bytesToWrite = GetTotalDataBytesToWrite(); WriteFieldsToBuffer(format, bytesToWrite, buffer); @@ -41,10 +45,6 @@ internal void WriteAs(TarEntryFormat format, Stream archiveStream, Span bu WriteData(archiveStream, _dataStream, _size); } } - else // seekable archive stream, unseekable data stream - { - WriteWithUnseekableDataStreamAs(format, archiveStream, buffer); - } } internal async Task WriteAsAsync(TarEntryFormat format, Stream archiveStream, Memory buffer, CancellationToken cancellationToken) @@ -52,7 +52,11 @@ internal async Task WriteAsAsync(TarEntryFormat format, Stream archiveStream, Me Debug.Assert(format > TarEntryFormat.Unknown && format <= TarEntryFormat.Gnu); Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek); - if (_dataStream == null || _dataStream.CanSeek) // seek status of archive does not matter + if (archiveStream.CanSeek && _dataStream is { CanSeek: false }) + { + await WriteWithUnseekableDataStreamAsAsync(format, archiveStream, buffer, cancellationToken).ConfigureAwait(false); + } + else // seek status of archive does not matter { long bytesToWrite = GetTotalDataBytesToWrite(); WriteFieldsToBuffer(format, bytesToWrite, buffer.Span); @@ -63,10 +67,6 @@ internal async Task WriteAsAsync(TarEntryFormat format, Stream archiveStream, Me await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false); } } - else // seekable archive stream, unseekable data stream - { - await WriteWithUnseekableDataStreamAsAsync(format, archiveStream, buffer, cancellationToken).ConfigureAwait(false); - } } private void WriteWithUnseekableDataStreamAs(TarEntryFormat format, Stream archiveStream, Span buffer) From 155788537a679cb50d9ce3161716c1f3bdff5b39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Wed, 24 May 2023 13:03:13 -0600 Subject: [PATCH 19/22] remove size assign comment --- .../System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index ddc3c822a24587..53d07b47e0b80f 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -355,7 +355,6 @@ private void WriteAsPaxExtendedAttributesShared(bool isGea, int globalExtendedAt GenerateExtendedAttributeName(); _mode = TarHelpers.GetDefaultMode(_typeFlag); - //_size = GetTotalDataBytesToWrite(); _typeFlag = isGea ? TarEntryType.GlobalExtendedAttributes : TarEntryType.ExtendedAttributes; } From 943988bc0086f9220fcc23e6a17b8903499e01bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Wed, 24 May 2023 14:06:44 -0600 Subject: [PATCH 20/22] Remove redundant debug assert --- .../System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 53d07b47e0b80f..bdc9d76d4df6af 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -554,7 +554,6 @@ private long GetTotalDataBytesToWrite() { return 0; } - Debug.Assert(_dataStream.CanSeek); long length = _dataStream.Length; long position = _dataStream.Position; From abff5c02908d7d719970381fb17972ab3a372e62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Wed, 24 May 2023 14:10:09 -0600 Subject: [PATCH 21/22] Async padding byte array creation simplification --- .../src/System/Formats/Tar/TarHeader.Write.cs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index bdc9d76d4df6af..fa50f89ca13932 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -660,11 +660,11 @@ private static void WriteEmptyPadding(Stream archiveStream, long actualLength) { Debug.Assert(paddingAfterData <= TarHelpers.RecordSize); - Span padding = stackalloc byte[TarHelpers.RecordSize]; - padding = padding.Slice(0, paddingAfterData); - padding.Clear(); + Span zeros = stackalloc byte[TarHelpers.RecordSize]; + zeros = zeros.Slice(0, paddingAfterData); + zeros.Clear(); - archiveStream.Write(padding); + archiveStream.Write(zeros); } } @@ -676,10 +676,7 @@ private static ValueTask WriteEmptyPaddingAsync(Stream archiveStream, long actua { Debug.Assert(paddingAfterData <= TarHelpers.RecordSize); - byte[] b = new byte[TarHelpers.RecordSize]; - Memory padding = b.AsMemory(); - padding = padding.Slice(0, paddingAfterData); - + byte[] zeros = new byte[paddingAfterData]; return archiveStream.WriteAsync(padding, cancellationToken); } From 66dc094f2adbabf46182e497309b7a05c907efc3 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 25 May 2023 09:21:45 +0200 Subject: [PATCH 22/22] Apply suggestions from code review --- .../src/System/Formats/Tar/TarHeader.Write.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index fa50f89ca13932..5d9d3a54431906 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -677,7 +677,7 @@ private static ValueTask WriteEmptyPaddingAsync(Stream archiveStream, long actua Debug.Assert(paddingAfterData <= TarHelpers.RecordSize); byte[] zeros = new byte[paddingAfterData]; - return archiveStream.WriteAsync(padding, cancellationToken); + return archiveStream.WriteAsync(zeros, cancellationToken); } return ValueTask.CompletedTask;