From 36e2ff0eea78c9b1253c713202a0b22b89a1553b Mon Sep 17 00:00:00 2001 From: Sebastian Burckhardt Date: Fri, 15 Mar 2024 13:00:21 -0700 Subject: [PATCH] Fix hanging blob creation (#351) * fix hanging blob creation * remove unnecessary try-catch --- .../StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs | 2 +- .../StorageLayer/Faster/AzureBlobs/BlobEntry.cs | 9 +++++++-- .../StorageLayer/Faster/AzureBlobs/StorageOperations.cs | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs index 0dbe9c6c..f733ecb2 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs @@ -429,7 +429,7 @@ public override void WriteAsync(IntPtr sourceAddress, int segmentId, ulong desti // If no blob exists for the segment, we must first create the segment asynchronouly. (Create call takes ~70 ms by measurement) // After creation is done, we can call write. - _ = entry.CreateAsync(size, pageBlob); + _ = entry.CreateAsync(size, pageBlob, id); } // Otherwise, some other thread beat us to it. Okay to use their blobs. blobEntry = this.blobs[segmentId]; diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs index 54fd8542..964cf3f6 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs @@ -55,7 +55,7 @@ public BlobEntry(AzureStorageDevice azureStorageDevice) /// /// maximum size of the blob /// The page blob to create - public async Task CreateAsync(long size, BlobUtilsV12.PageBlobClients pageBlob) + public async Task CreateAsync(long size, BlobUtilsV12.PageBlobClients pageBlob, long id) { if (this.waitingCount != 0) { @@ -67,12 +67,17 @@ await this.azureStorageDevice.BlobManager.PerformWithRetriesAsync( true, "PageBlobClient.CreateAsync", "CreateDevice", - "", + $"id={id}", pageBlob.Default.Name, 3000, true, async (numAttempts) => { + if (this.ETag != default) + { + return 1; // blob was already created by previous attempt + } + var client = (numAttempts > 1) ? pageBlob.Default : pageBlob.Aggressive; var response = await client.CreateAsync( diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs index a07acbb1..b160817c 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs @@ -101,7 +101,7 @@ public async Task PerformWithRetriesAsync( } continue; } - catch (Azure.RequestFailedException ex) when (BlobUtilsV12.PreconditionFailed(ex) && readETagAsync != null) + catch (Azure.RequestFailedException ex) when (BlobUtilsV12.PreconditionFailed(ex) && readETagAsync != null && numAttempts < BlobManager.MaxRetries) { this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) failed precondition on attempt {numAttempts}; target={target} latencyMs={stopwatch.Elapsed.TotalMilliseconds:F1} {details}"); mustReadETagFirst = true;