Skip to content

Commit

Permalink
Implement lease extension
Browse files Browse the repository at this point in the history
Add flag `--experimental_remote_cache_lease_extension`, which when set, Bazel will create a background thread periodically sending `FindMissingBlobs` requests to CAS during the build.

1. All the outputs that were not downloaded are within the scope of lease extension. The outputs are acquired from skyframe by traversing the action graph.
2. Lease extension starts after any action was built and ends after execution phase ended. The frequency is related to `--experimental_remote_cache_ttl`.
3. Lease extensions are performed on action basis, not by collecting all outputs and issue one giant `FindMissingBlobs`.
    - Collecting all outputs might increase memory watermark and cause OOM.
    - Sending one `FindMissingBlobs` request per action may increase the overhead of network roundtrip, but the cost should be saturated given that the lease extension happens at background and is not wall time critical.
4. For an incremental build, the same applies: lease extension starts after any action was executed.
    - We don't want lease extension blocking action execution, nor affecting build performance.
    - Since we have TTL based cache discarding, any expired blobs will be discarded.
    - Leases of blobs that are not downloaded, still used by this build (because they are referenced by skyframe) will be extended as normal.

Part of #16660.

Closes #17944.

PiperOrigin-RevId: 544032753
Change-Id: Iafe8b96c48abbb2e67302cd7a2f06f97ab43f825
  • Loading branch information
coeuvre authored and copybara-github committed Jun 28, 2023
1 parent 15c412e commit 1a2b792
Show file tree
Hide file tree
Showing 11 changed files with 449 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,26 @@
package com.google.devtools.build.lib.actions;

import com.google.devtools.build.lib.actions.cache.ActionCache;
import java.util.AbstractMap.SimpleEntry;
import java.util.Map.Entry;
import javax.annotation.Nullable;

/** Utility functions for {@link ActionCache}. */
public class ActionCacheUtils {
private ActionCacheUtils() {}

@Nullable
public static Entry<String, ActionCache.Entry> getCacheEntryWithKey(
ActionCache actionCache, Action action) {
for (Artifact output : action.getOutputs()) {
ActionCache.Entry entry = actionCache.get(output.getExecPathString());
if (entry != null) {
return new SimpleEntry<>(output.getExecPathString(), entry);
}
}
return null;
}

/** Checks whether one of existing output paths is already used as a key. */
@Nullable
public static ActionCache.Entry getCacheEntry(ActionCache actionCache, Action action) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,12 @@ public long getExpireAtEpochMilli() {
return -1;
}

/**
* Extends the expiration time for this metadata. If it was constructed without known expiration
* time (i.e. expireAtEpochMilli < 0), this extension does nothing.
*/
public void extendExpireAtEpochMilli(long expireAtEpochMilli) {}

public boolean isAlive(Instant now) {
return true;
}
Expand Down Expand Up @@ -655,7 +661,7 @@ public final String toString() {

/** A remote artifact that expires at a particular time. */
private static final class RemoteFileArtifactValueWithExpiration extends RemoteFileArtifactValue {
private final long expireAtEpochMilli;
private long expireAtEpochMilli;

private RemoteFileArtifactValueWithExpiration(
byte[] digest,
Expand All @@ -672,6 +678,12 @@ public long getExpireAtEpochMilli() {
return expireAtEpochMilli;
}

@Override
public void extendExpireAtEpochMilli(long expireAtEpochMilli) {
Preconditions.checkState(expireAtEpochMilli > this.expireAtEpochMilli);
this.expireAtEpochMilli = expireAtEpochMilli;
}

@Override
public boolean isAlive(Instant now) {
return now.toEpochMilli() < expireAtEpochMilli;
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/com/google/devtools/build/lib/remote/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib:runtime/command_line_path_factory",
"//src/main/java/com/google/devtools/build/lib/actions",
"//src/main/java/com/google/devtools/build/lib/actions:action_input_helper",
"//src/main/java/com/google/devtools/build/lib/actions:action_lookup_data",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:execution_requirements",
"//src/main/java/com/google/devtools/build/lib/actions:file_metadata",
Expand Down Expand Up @@ -99,7 +100,9 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/remote/options",
"//src/main/java/com/google/devtools/build/lib/remote/util",
"//src/main/java/com/google/devtools/build/lib/remote/zstd",
"//src/main/java/com/google/devtools/build/lib/skyframe:action_execution_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:mutable_supplier",
"//src/main/java/com/google/devtools/build/lib/skyframe:sky_functions",
"//src/main/java/com/google/devtools/build/lib/skyframe:tree_artifact_value",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/util:abrupt_exit_exception",
Expand All @@ -116,6 +119,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/worker:worker_options",
"//src/main/java/com/google/devtools/build/lib/worker:worker_spawn_runner",
"//src/main/java/com/google/devtools/build/skyframe",
"//src/main/java/com/google/devtools/build/skyframe:skyframe-objects",
"//src/main/java/com/google/devtools/common/options",
"//src/main/protobuf:failure_details_java_proto",
"//src/main/protobuf:spawn_java_proto",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,60 @@
import com.google.devtools.build.lib.skyframe.TreeArtifactValue;
import com.google.devtools.build.skyframe.MemoizingEvaluator;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.annotation.Nullable;

/** A lease service that manages the lease of remote blobs. */
public class LeaseService {
private final MemoizingEvaluator memoizingEvaluator;
@Nullable private final ActionCache actionCache;
private final AtomicBoolean leaseExtensionStarted = new AtomicBoolean(false);
@Nullable LeaseExtension leaseExtension;

public LeaseService(MemoizingEvaluator memoizingEvaluator, @Nullable ActionCache actionCache) {
public LeaseService(
MemoizingEvaluator memoizingEvaluator,
@Nullable ActionCache actionCache,
@Nullable LeaseExtension leaseExtension) {
this.memoizingEvaluator = memoizingEvaluator;
this.actionCache = actionCache;
this.leaseExtension = leaseExtension;
}

/** Clean up internal state when files are evicted from remote CAS. */
public void handleMissingInputs(Set<ActionInput> missingActionInputs) {
if (missingActionInputs.isEmpty()) {
return;
public void finalizeAction() {
if (leaseExtensionStarted.compareAndSet(false, true)) {
if (leaseExtension != null) {
leaseExtension.start();
}
}
}

public void finalizeExecution(Set<ActionInput> missingActionInputs) {
if (leaseExtension != null) {
leaseExtension.stop();
}

if (!missingActionInputs.isEmpty()) {
handleMissingInputs();
}
}

/**
* An interface whose implementations extend the leases of remote outputs referenced by skyframe.
*/
public interface LeaseExtension {
void start();

void stop();
}

/** Clean up internal state when files are evicted from remote CAS. */
private void handleMissingInputs() {
// If any outputs are evicted, remove all remote metadata from skyframe and local action cache.
//
// With TTL based discarding and lease extension, remote cache eviction error won't happen if
// remote cache can guarantee the TTL. However, if it happens, it usually means the remote cache
// is under high load and it could possibly evict more blobs that Bazel wouldn't aware of.
// Following builds could still fail for the same error (caused by different blobs).

memoizingEvaluator.delete(
key -> {
if (key.functionName().equals(SkyFunctions.ACTION_EXECUTION)) {
Expand Down
Loading

0 comments on commit 1a2b792

Please sign in to comment.