diff --git a/WaybackDownloader/DefaultCommand.cs b/WaybackDownloader/DefaultCommand.cs index 251846d..e0882e1 100644 --- a/WaybackDownloader/DefaultCommand.cs +++ b/WaybackDownloader/DefaultCommand.cs @@ -2,7 +2,6 @@ using Microsoft.Extensions.DependencyInjection; using Spectre.Console; using Spectre.Console.Cli; -using WaybackDownloader.Logging; using WaybackDownloader.Services; using WaybackDownloader.Spectre; using Settings = WaybackDownloader.DefaultCommand.Settings; @@ -21,10 +20,12 @@ public override async Task ExecuteAsync(CommandContext context, Settings se PrintSettings(settings); ServiceProvider? serviceProvider = null; - Task? pageWokerRunnerTask = null; + Task? pageWorkerRunnerTask = null; + Task? uiTask = null; - using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); - cts.Token.Register(() => AnsiConsole.WriteLine("Shutting down...")); + cancellationToken.Register(() => AnsiConsole.WriteLine("Shutting down...")); + using var workerCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + using var uiCts = new CancellationTokenSource(); try { @@ -58,10 +59,9 @@ public override async Task ExecuteAsync(CommandContext context, Settings se var downloaderService = serviceProvider.GetRequiredService(); var pageWorkerRunner = serviceProvider.GetRequiredService(); - var loggingMessagesAccessor = serviceProvider.GetRequiredService(); var ui = serviceProvider.GetRequiredService(); - var uiTask = ui.DrawUiAsync(cts.Token); + uiTask = ui.DrawUiAsync(uiCts.Token); var outputDir = settings.OutputDir; outputDir.Create(); @@ -71,25 +71,17 @@ public override async Task ExecuteAsync(CommandContext context, Settings se return 0; } - var downloaderTask = downloaderService.StartDownloadAsync(settings.MatchUrl, settings.MatchType, settings.ParsedFilters, settings.LimitPages, cts.Token); - pageWorkerRunner.StartTasks(outputDir.FullName, settings.RateLimit, cts.Token); + var downloaderTask = downloaderService.StartDownloadAsync(settings.MatchUrl, settings.MatchType, settings.ParsedFilters, settings.LimitPages, workerCts.Token); + pageWorkerRunner.StartTasks(outputDir.FullName, settings.RateLimit, workerCts.Token); + pageWorkerRunnerTask = pageWorkerRunner.WaitForCompletionAsync(); await downloaderTask.ConfigureAwait(false); - pageWokerRunnerTask = pageWorkerRunner.WaitForCompletionAsync(); - await pageWokerRunnerTask.ConfigureAwait(false); + await pageWorkerRunnerTask.ConfigureAwait(false); - await cts.CancelAsync().ConfigureAwait(false); + await workerCts.CancelAsync().ConfigureAwait(false); + await uiCts.CancelAsync().ConfigureAwait(false); await uiTask.ConfigureAwait(false); await serviceProvider.DisposeAsync().ConfigureAwait(false); - - foreach (var item in loggingMessagesAccessor.DrainMessages()) - { - AnsiConsole.WriteLine(item.Message); - if (item.Exception is not null) - { - AnsiConsole.WriteException(item.Exception); - } - } return 0; } #pragma warning disable CA1031 // Do not catch general exception types @@ -116,6 +108,15 @@ public override async Task ExecuteAsync(CommandContext context, Settings se } else { + if (pageWorkerRunnerTask is not null) + { + await pageWorkerRunnerTask.ConfigureAwait(false); + } + await uiCts.CancelAsync().ConfigureAwait(false); + if (uiTask is not null) + { + await uiTask.ConfigureAwait(false); + } var disposeException = await DisposeProviderAsync(serviceProvider).ConfigureAwait(false); if (disposeException is not null) { diff --git a/WaybackDownloader/Logging/CollectedLogMessages.cs b/WaybackDownloader/Logging/CollectedLogMessages.cs index 6b67de2..eaf93ab 100644 --- a/WaybackDownloader/Logging/CollectedLogMessages.cs +++ b/WaybackDownloader/Logging/CollectedLogMessages.cs @@ -11,7 +11,8 @@ internal sealed class CollectedLogMessages public ImmutableArray DrainMessages() { //Cannot use LINQ here - see https://github.com/dotnet/runtime/issues/101641 - var array = ImmutableArray.CreateBuilder(_messages.Count); + //Do not use .Count - this locks the ConcurrentQueue and is very slow + var array = ImmutableArray.CreateBuilder(20); while (_messages.TryDequeue(out var m)) { array.Add(m); diff --git a/WaybackDownloader/MockDataHttpMessageHandler.cs b/WaybackDownloader/MockDataHttpMessageHandler.cs index c05d9d1..8d4f957 100644 --- a/WaybackDownloader/MockDataHttpMessageHandler.cs +++ b/WaybackDownloader/MockDataHttpMessageHandler.cs @@ -36,9 +36,9 @@ private static async Task CreateCdxResponseAsync(Cancellati return a; } - //private static int _year = 1001; - //private static int GetYear() => Interlocked.Increment(ref _year); - private static int GetYear() => 2000; + private static int _year = 1001; + private static int GetYear() => Interlocked.Increment(ref _year); + //private static int GetYear() => 2000; private static async Task CreateHtmlResponseAsync(CancellationToken cancellationToken) { diff --git a/WaybackDownloader/Properties/launchSettings.json b/WaybackDownloader/Properties/launchSettings.json index 71a49ad..61204dc 100644 --- a/WaybackDownloader/Properties/launchSettings.json +++ b/WaybackDownloader/Properties/launchSettings.json @@ -2,7 +2,7 @@ "profiles": { "WaybackDownloader": { "commandName": "Project", - "commandLineArgs": "archive.org \"./pages\" -m exact --limitPages 10000 -p SomeString1 -r 1 --historyLogDir=./logFileLocation --useMockHandler" + "commandLineArgs": "archive.org \"./pages\" -m exact --limitPages 10000 -p SomeString1 -r 10 --historyLogDir=./logFileLocation --useMockHandler --clearHistory" } } } \ No newline at end of file diff --git a/WaybackDownloader/Services/PageWorker.cs b/WaybackDownloader/Services/PageWorker.cs index 7ed3506..70ec636 100644 --- a/WaybackDownloader/Services/PageWorker.cs +++ b/WaybackDownloader/Services/PageWorker.cs @@ -42,35 +42,39 @@ public async Task StartAsync(string outputDir, CancellationToken cancellationTok { var shouldExit = false; cancellationToken.Register(() => shouldExit = true); - - while (!shouldExit && await _reader.WaitToReadAsync(default).ConfigureAwait(false)) + try { - while (!shouldExit && !cancellationToken.IsCancellationRequested && _reader.TryRead(out var record)) + while (!shouldExit && await _reader.WaitToReadAsync(default).ConfigureAwait(false)) { - if (!PathUtilities.TryGetNormalizedFilePath(record.Original, out var normalizedPath)) - { - logger.UrlCouldNotBeConverted(record.Original); - Counters.FilesSkipped.Increment(); - continue; - } - - logger.UrlTransformed(record.Original, normalizedPath); - - var writePath = Path.Combine(outputDir, normalizedPath); - var pageKey = new PageKey(record.UrlKey, normalizedPath); - var foundPage = pagesStore.TryGetDownloadedPageTimestamp(pageKey.Value, out var timestamp); - if (!foundPage || timestamp < record.Timestamp) + while (!shouldExit && !cancellationToken.IsCancellationRequested && _reader.TryRead(out var record)) { - await TryWritePageAsync(record, writePath, pageKey, foundPage, CancellationToken.None).ConfigureAwait(false); - } - else - { - logger.UrlAlreadyDownloaded(record.Original); - Counters.FilesSkipped.Increment(); + if (!PathUtilities.TryGetNormalizedFilePath(record.Original, out var normalizedPath)) + { + logger.UrlCouldNotBeConverted(record.Original); + Counters.FilesSkipped.Increment(); + continue; + } + + logger.UrlTransformed(record.Original, normalizedPath); + + var writePath = Path.Combine(outputDir, normalizedPath); + var pageKey = new PageKey(record.UrlKey, normalizedPath); + var foundPage = pagesStore.TryGetDownloadedPageTimestamp(pageKey.Value, out var timestamp); + if (!foundPage || timestamp < record.Timestamp) + { + await TryWritePageAsync(record, writePath, pageKey, foundPage, CancellationToken.None).ConfigureAwait(false); + } + else + { + logger.UrlAlreadyDownloaded(record.Original); + Counters.FilesSkipped.Increment(); + } } } } - + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + } logger.ExitingWorker(); } diff --git a/WaybackDownloader/Services/PageWorkerRunner.cs b/WaybackDownloader/Services/PageWorkerRunner.cs index f9482d9..e948f28 100644 --- a/WaybackDownloader/Services/PageWorkerRunner.cs +++ b/WaybackDownloader/Services/PageWorkerRunner.cs @@ -36,7 +36,7 @@ private Task StartAsync(string outputDir, CancellationToken cancellationToken) private int _numberOfEvaluationsAtRequiredSpeed; private async Task EvaluateLimitAsync(string outputDir, int requestedDownloadLimit, CancellationToken cancellationToken) { - const int SegmentDurationSeconds = 5; + const int SegmentDurationSeconds = 2; const float MinimumThreshold = 0.9f; await Task.Yield(); while (!cancellationToken.IsCancellationRequested)