Skip to content

Commit fadd992

Browse files
authored
fix(bigquery): Remove ReadAPI bypass in executeSelect() (#3624)
* fix(bigquery): Remove ReadAPI bypass in executeSelect() for fast query requests. * Enable fast query and read API. Move readAPI condition check from getExecuteSelectResponse() to queryRpc(). This allows fast query to be used with ReadAPI. * Check for null for fast query results.getTotalRows() * Remove test file. * Add internal query state to keep track of ReadAPI usage. A Boolean field is added to keep track of whether or not the high throughput ReadAPI is used. This is mostly for testing to avoid another regression in the future. * Move tests into IT test file * Fix formatting changes. Again :/ * Add VisibleForTesting Annotation
1 parent 09c98bf commit fadd992

File tree

3 files changed

+89
-8
lines changed

3 files changed

+89
-8
lines changed

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ConnectionImpl.java

+16-8
Original file line numberDiff line numberDiff line change
@@ -476,22 +476,29 @@ private BigQueryResult queryRpc(
476476
}
477477

478478
// Query finished running and we can paginate all the results
479-
if (results.getJobComplete() && results.getSchema() != null) {
479+
// Results should be read using the high throughput read API if sufficiently large.
480+
boolean resultsLargeEnoughForReadApi =
481+
connectionSettings.getUseReadAPI()
482+
&& results.getTotalRows() != null
483+
&& results.getTotalRows().longValue() > connectionSettings.getMinResultSize();
484+
if (results.getJobComplete() && results.getSchema() != null && !resultsLargeEnoughForReadApi) {
480485
return processQueryResponseResults(results);
481486
} else {
482-
// Query is long-running (> 10s) and hasn't completed yet, or query completed but didn't
483-
// return the schema, fallback to jobs.insert path. Some operations don't return the schema
484-
// and can be optimized here, but this is left as future work.
485-
Long totalRows = results.getTotalRows() == null ? null : results.getTotalRows().longValue();
486-
Long pageRows = results.getRows() == null ? null : (long) (results.getRows().size());
487+
// Query is long-running (> 10s) and hasn't completed yet, query completed but didn't
488+
// return the schema, or results are sufficiently large to use the high throughput read API,
489+
// fallback to jobs.insert path. Some operations don't return the schema and can be optimized
490+
// here, but this is left as future work.
491+
JobId jobId = JobId.fromPb(results.getJobReference());
492+
GetQueryResultsResponse firstPage = getQueryResultsFirstPage(jobId);
493+
Long totalRows =
494+
firstPage.getTotalRows() == null ? null : firstPage.getTotalRows().longValue();
495+
Long pageRows = firstPage.getRows() == null ? null : (long) (firstPage.getRows().size());
487496
logger.log(
488497
Level.WARNING,
489498
"\n"
490499
+ String.format(
491500
"results.getJobComplete(): %s, isSchemaNull: %s , totalRows: %s, pageRows: %s",
492501
results.getJobComplete(), results.getSchema() == null, totalRows, pageRows));
493-
JobId jobId = JobId.fromPb(results.getJobReference());
494-
GetQueryResultsResponse firstPage = getQueryResultsFirstPage(jobId);
495502
return getSubsequentQueryResultsWithJob(
496503
totalRows, pageRows, jobId, firstPage, hasQueryParameters);
497504
}
@@ -996,6 +1003,7 @@ BigQueryResult highThroughPutRead(
9961003
schema);
9971004

9981005
logger.log(Level.INFO, "\n Using BigQuery Read API");
1006+
stats.getQueryStatistics().setUseReadApi(true);
9991007
return new BigQueryResultImpl<BigQueryResultImpl.Row>(schema, totalRows, bufferRow, stats);
10001008

10011009
} catch (IOException e) {

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/JobStatistics.java

+15
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import com.google.auto.value.AutoValue;
2828
import com.google.cloud.StringEnumType;
2929
import com.google.cloud.StringEnumValue;
30+
import com.google.common.annotations.VisibleForTesting;
3031
import com.google.common.base.Function;
3132
import com.google.common.base.MoreObjects;
3233
import com.google.common.base.MoreObjects.ToStringHelper;
@@ -396,6 +397,7 @@ public static class QueryStatistics extends JobStatistics {
396397
private final BiEngineStats biEngineStats;
397398
private final Integer billingTier;
398399
private final Boolean cacheHit;
400+
private Boolean useReadApi;
399401
private final String ddlOperationPerformed;
400402
private final TableId ddlTargetTable;
401403
private final RoutineId ddlTargetRoutine;
@@ -796,6 +798,7 @@ private QueryStatistics(Builder builder) {
796798
this.biEngineStats = builder.biEngineStats;
797799
this.billingTier = builder.billingTier;
798800
this.cacheHit = builder.cacheHit;
801+
this.useReadApi = false;
799802
this.ddlOperationPerformed = builder.ddlOperationPerformed;
800803
this.ddlTargetTable = builder.ddlTargetTable;
801804
this.ddlTargetRoutine = builder.ddlTargetRoutine;
@@ -835,6 +838,18 @@ public Boolean getCacheHit() {
835838
return cacheHit;
836839
}
837840

841+
/** Returns whether the query result is read from the high throughput ReadAPI. */
842+
@VisibleForTesting
843+
public Boolean getUseReadApi() {
844+
return useReadApi;
845+
}
846+
847+
/** Sets internal state to reflect the use of the high throughput ReadAPI. */
848+
@VisibleForTesting
849+
public void setUseReadApi(Boolean useReadApi) {
850+
this.useReadApi = useReadApi;
851+
}
852+
838853
/** [BETA] For DDL queries, returns the operation applied to the DDL target table. */
839854
public String getDdlOperationPerformed() {
840855
return ddlOperationPerformed;

google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java

+58
Original file line numberDiff line numberDiff line change
@@ -3489,6 +3489,63 @@ public void testExecuteSelectDefaultConnectionSettings() throws SQLException {
34893489
String query = "SELECT corpus FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus;";
34903490
BigQueryResult bigQueryResult = connection.executeSelect(query);
34913491
assertEquals(42, bigQueryResult.getTotalRows());
3492+
assertFalse(bigQueryResult.getBigQueryResultStats().getQueryStatistics().getUseReadApi());
3493+
}
3494+
3495+
@Test
3496+
public void testExecuteSelectWithReadApi() throws SQLException {
3497+
final int rowLimit = 5000;
3498+
final String QUERY =
3499+
"SELECT * FROM bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2017 LIMIT %s";
3500+
// Job timeout is somewhat arbitrary - just ensures that fast query is not used.
3501+
// min result size and page row count ratio ensure that the ReadAPI is used.
3502+
ConnectionSettings connectionSettingsReadAPIEnabledFastQueryDisabled =
3503+
ConnectionSettings.newBuilder()
3504+
.setUseReadAPI(true)
3505+
.setJobTimeoutMs(Long.MAX_VALUE)
3506+
.setMinResultSize(500)
3507+
.setTotalToPageRowCountRatio(1)
3508+
.build();
3509+
3510+
Connection connectionReadAPIEnabled =
3511+
bigquery.createConnection(connectionSettingsReadAPIEnabledFastQueryDisabled);
3512+
3513+
String selectQuery = String.format(QUERY, rowLimit);
3514+
3515+
BigQueryResult bigQueryResultSet = connectionReadAPIEnabled.executeSelect(selectQuery);
3516+
ResultSet rs = bigQueryResultSet.getResultSet();
3517+
// Paginate results to avoid an InterruptedException
3518+
while (rs.next()) {}
3519+
3520+
assertTrue(bigQueryResultSet.getBigQueryResultStats().getQueryStatistics().getUseReadApi());
3521+
connectionReadAPIEnabled.close();
3522+
}
3523+
3524+
@Test
3525+
public void testExecuteSelectWithFastQueryReadApi() throws SQLException {
3526+
final int rowLimit = 5000;
3527+
final String QUERY =
3528+
"SELECT * FROM bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2017 LIMIT %s";
3529+
// min result size and page row count ratio ensure that the ReadAPI is used.
3530+
ConnectionSettings connectionSettingsReadAPIEnabledFastQueryDisabled =
3531+
ConnectionSettings.newBuilder()
3532+
.setUseReadAPI(true)
3533+
.setMinResultSize(500)
3534+
.setTotalToPageRowCountRatio(1)
3535+
.build();
3536+
3537+
Connection connectionReadAPIEnabled =
3538+
bigquery.createConnection(connectionSettingsReadAPIEnabledFastQueryDisabled);
3539+
3540+
String selectQuery = String.format(QUERY, rowLimit);
3541+
3542+
BigQueryResult bigQueryResultSet = connectionReadAPIEnabled.executeSelect(selectQuery);
3543+
ResultSet rs = bigQueryResultSet.getResultSet();
3544+
// Paginate results to avoid an InterruptedException
3545+
while (rs.next()) {}
3546+
3547+
assertTrue(bigQueryResultSet.getBigQueryResultStats().getQueryStatistics().getUseReadApi());
3548+
connectionReadAPIEnabled.close();
34923549
}
34933550

34943551
@Test
@@ -3540,6 +3597,7 @@ public void testExecuteSelectWithCredentials() throws SQLException {
35403597
+ TABLE_ID_LARGE.getTable(); // Large query result is needed to use BigQueryReadClient.
35413598
BigQueryResult bigQueryResult = connectionGoodCredentials.executeSelect(query);
35423599
assertEquals(313348, bigQueryResult.getTotalRows());
3600+
assertTrue(bigQueryResult.getBigQueryResultStats().getQueryStatistics().getUseReadApi());
35433601

35443602
// Scenario 2.
35453603
// Create a new bigQuery object but explicitly an invalid credential.

0 commit comments

Comments
 (0)