Skip to content

Commit 9a3d2ed

Browse files
olavloitesurbhigarg92
authored andcommitted
fix: retry specific internal errors (googleapis#3565)
* chore: make internal auth backend errors retryable Spanner occasionally returns INTERNAL errors regarding the auth backend server. These errors should be regarded as retryable. * fix: retry specific internal errors Some specific internal errors should be retrid. Instead of adding INTERNAL as a standard retryable error code, we use an interceptor to catch and translate those specific errors. See also b/375684610 * chore: address review comments * fix: wait for session pool to initialize * fix: register errors before creating the client
1 parent 03c383d commit 9a3d2ed

File tree

4 files changed

+133
-18
lines changed

4 files changed

+133
-18
lines changed

google-cloud-spanner/src/main/java/com/google/cloud/spanner/IsRetryableInternalError.java

+20-18
Original file line numberDiff line numberDiff line change
@@ -18,33 +18,35 @@
1818

1919
import com.google.api.gax.rpc.InternalException;
2020
import com.google.common.base.Predicate;
21+
import com.google.common.collect.ImmutableList;
2122
import io.grpc.Status;
23+
import io.grpc.Status.Code;
2224
import io.grpc.StatusRuntimeException;
2325

2426
public class IsRetryableInternalError implements Predicate<Throwable> {
27+
public static final IsRetryableInternalError INSTANCE = new IsRetryableInternalError();
2528

26-
private static final String HTTP2_ERROR_MESSAGE = "HTTP/2 error code: INTERNAL_ERROR";
27-
private static final String CONNECTION_CLOSED_ERROR_MESSAGE =
28-
"Connection closed with unknown cause";
29-
private static final String EOS_ERROR_MESSAGE =
30-
"Received unexpected EOS on DATA frame from server";
29+
private static final ImmutableList<String> RETRYABLE_ERROR_MESSAGES =
30+
ImmutableList.of(
31+
"HTTP/2 error code: INTERNAL_ERROR",
32+
"Connection closed with unknown cause",
33+
"Received unexpected EOS on DATA frame from server",
34+
"stream terminated by RST_STREAM",
35+
"Authentication backend internal server error. Please retry.");
3136

32-
private static final String RST_STREAM_ERROR_MESSAGE = "stream terminated by RST_STREAM";
37+
public boolean isRetryableInternalError(Status status) {
38+
return status.getCode() == Code.INTERNAL
39+
&& status.getDescription() != null
40+
&& isRetryableErrorMessage(status.getDescription());
41+
}
3342

3443
@Override
3544
public boolean apply(Throwable cause) {
36-
if (isInternalError(cause)) {
37-
if (cause.getMessage().contains(HTTP2_ERROR_MESSAGE)) {
38-
return true;
39-
} else if (cause.getMessage().contains(CONNECTION_CLOSED_ERROR_MESSAGE)) {
40-
return true;
41-
} else if (cause.getMessage().contains(EOS_ERROR_MESSAGE)) {
42-
return true;
43-
} else if (cause.getMessage().contains(RST_STREAM_ERROR_MESSAGE)) {
44-
return true;
45-
}
46-
}
47-
return false;
45+
return isInternalError(cause) && isRetryableErrorMessage(cause.getMessage());
46+
}
47+
48+
private boolean isRetryableErrorMessage(String errorMessage) {
49+
return RETRYABLE_ERROR_MESSAGES.stream().anyMatch(errorMessage::contains);
4850
}
4951

5052
private boolean isInternalError(Throwable cause) {

google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/SpannerErrorInterceptor.java

+7
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package com.google.cloud.spanner.spi.v1;
1818

19+
import com.google.cloud.spanner.IsRetryableInternalError;
1920
import com.google.rpc.BadRequest;
2021
import com.google.rpc.Help;
2122
import com.google.rpc.LocalizedMessage;
@@ -32,6 +33,7 @@
3233
import io.grpc.Metadata;
3334
import io.grpc.MethodDescriptor;
3435
import io.grpc.Status;
36+
import io.grpc.Status.Code;
3537
import io.grpc.protobuf.ProtoUtils;
3638
import java.util.logging.Level;
3739
import java.util.logging.Logger;
@@ -69,6 +71,11 @@ public void start(Listener<RespT> responseListener, Metadata headers) {
6971
@Override
7072
public void onClose(Status status, Metadata trailers) {
7173
try {
74+
// Translate INTERNAL errors that should be retried to a retryable error code.
75+
if (IsRetryableInternalError.INSTANCE.isRetryableInternalError(status)) {
76+
status =
77+
Status.fromCode(Code.UNAVAILABLE).withDescription(status.getDescription());
78+
}
7279
if (trailers.containsKey(LOCALIZED_MESSAGE_KEY)) {
7380
status =
7481
Status.fromCodeValue(status.getCode().value())

google-cloud-spanner/src/test/java/com/google/cloud/spanner/IsRetryableInternalErrorTest.java

+11
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,17 @@ public void rstStreamInternalExceptionIsRetryable() {
127127
assertTrue(predicate.apply(e));
128128
}
129129

130+
@Test
131+
public void testAuthenticationBackendInternalServerErrorIsRetryable() {
132+
final StatusRuntimeException exception =
133+
new StatusRuntimeException(
134+
Status.fromCode(Code.INTERNAL)
135+
.withDescription(
136+
"INTERNAL: Authentication backend internal server error. Please retry."));
137+
138+
assertTrue(predicate.apply(exception));
139+
}
140+
130141
@Test
131142
public void genericInternalExceptionIsNotRetryable() {
132143
final InternalException e =
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.spanner;
18+
19+
import static org.junit.Assert.assertEquals;
20+
import static org.junit.Assert.assertFalse;
21+
import static org.junit.Assert.assertTrue;
22+
23+
import com.google.cloud.NoCredentials;
24+
import com.google.cloud.spanner.MockSpannerServiceImpl.SimulatedExecutionTime;
25+
import com.google.cloud.spanner.connection.AbstractMockServerTest;
26+
import com.google.spanner.v1.BatchCreateSessionsRequest;
27+
import com.google.spanner.v1.ExecuteSqlRequest;
28+
import io.grpc.ManagedChannelBuilder;
29+
import io.grpc.Status;
30+
import org.junit.Test;
31+
import org.junit.runner.RunWith;
32+
import org.junit.runners.JUnit4;
33+
import org.threeten.bp.Duration;
34+
35+
@RunWith(JUnit4.class)
36+
public class RetryableInternalErrorTest extends AbstractMockServerTest {
37+
@Test
38+
public void testTranslateInternalException() {
39+
mockSpanner.setBatchCreateSessionsExecutionTime(
40+
SimulatedExecutionTime.ofException(
41+
Status.INTERNAL
42+
.withDescription("Authentication backend internal server error. Please retry.")
43+
.asRuntimeException()));
44+
mockSpanner.setExecuteStreamingSqlExecutionTime(
45+
SimulatedExecutionTime.ofException(
46+
Status.INTERNAL
47+
.withDescription("Authentication backend internal server error. Please retry.")
48+
.asRuntimeException()));
49+
50+
try (Spanner spanner =
51+
SpannerOptions.newBuilder()
52+
.setProjectId("my-project")
53+
.setHost(String.format("http://localhost:%d", getPort()))
54+
.setChannelConfigurator(ManagedChannelBuilder::usePlaintext)
55+
.setCredentials(NoCredentials.getInstance())
56+
.setSessionPoolOption(
57+
SessionPoolOptions.newBuilder()
58+
.setMinSessions(1)
59+
.setMaxSessions(1)
60+
.setWaitForMinSessions(Duration.ofSeconds(5))
61+
.build())
62+
.build()
63+
.getService()) {
64+
65+
DatabaseClient client = spanner.getDatabaseClient(DatabaseId.of("p", "i", "d"));
66+
// Execute a query. This will block until a BatchCreateSessions call has finished and then
67+
// invoke ExecuteStreamingSql. Both of these RPCs should be retried.
68+
try (ResultSet resultSet = client.singleUse().executeQuery(SELECT1_STATEMENT)) {
69+
assertTrue(resultSet.next());
70+
assertFalse(resultSet.next());
71+
}
72+
// Verify that both the BatchCreateSessions call and the ExecuteStreamingSql call were
73+
// retried.
74+
assertEquals(2, mockSpanner.countRequestsOfType(BatchCreateSessionsRequest.class));
75+
assertEquals(2, mockSpanner.countRequestsOfType(ExecuteSqlRequest.class));
76+
// Clear the requests before the next test.
77+
mockSpanner.clearRequests();
78+
79+
// Execute a DML statement. This uses the ExecuteSql RPC.
80+
assertEquals(0, mockSpanner.countRequestsOfType(ExecuteSqlRequest.class));
81+
mockSpanner.setExecuteSqlExecutionTime(
82+
SimulatedExecutionTime.ofException(
83+
Status.INTERNAL
84+
.withDescription("Authentication backend internal server error. Please retry.")
85+
.asRuntimeException()));
86+
assertEquals(
87+
Long.valueOf(1L),
88+
client
89+
.readWriteTransaction()
90+
.run(transaction -> transaction.executeUpdate(INSERT_STATEMENT)));
91+
// Verify that also this request was retried.
92+
assertEquals(2, mockSpanner.countRequestsOfType(ExecuteSqlRequest.class));
93+
}
94+
}
95+
}

0 commit comments

Comments
 (0)