/hbase-export' as {@code exportedSnapshotDir}
+ */
+ public HBaseSnapshotInputConfigBuilder setSnapshotName(String snapshotName) {
+ this.snapshotName = snapshotName;
+ return this;
+ }
+
+ /*
+ * Set the unique suffix to be used for restore folder to avoid conflicts
+ */
+ public HBaseSnapshotInputConfigBuilder setRestoreDirSuffix(String suffix) {
+ this.restoreDirSuffix = suffix;
+ return this;
+ }
+
+ public String getRestoreDir() {
+ return RESTORE_DIR + this.restoreDirSuffix;
+ }
+
+ public Configuration build() throws Exception {
+ Preconditions.checkNotNull(projectId, "Required value projectId must be set");
+ Preconditions.checkNotNull(
+ hbaseSnapshotSourceDir, "Required value hbaseSnapshotSourceDir must be set");
+ Preconditions.checkNotNull(snapshotName, "Required value snapshotName must be set");
+ Preconditions.checkState(
+ hbaseSnapshotSourceDir.startsWith(GcsPath.SCHEME),
+ "snapshot folder must be hosted in a GCS bucket ");
+
+ Configuration conf = createHBaseConfiguration();
+
+ // Configuring a MapReduce Job base on HBaseConfiguration
+ // and return the job Configuration
+ ClientProtos.Scan proto = ProtobufUtil.toScan(new Scan().setBatch(BATCH_SIZE));
+ conf.set(TableInputFormat.SCAN, Base64.encodeBytes(proto.toByteArray()));
+ Job job = Job.getInstance(conf); // creates internal clone of hbaseConf
+ // the restore folder need to under current bucket root so to be considered
+ // within the same filesystem with the hbaseSnapshotSourceDir
+ TableSnapshotInputFormat.setInput(job, snapshotName, new Path(getRestoreDir()));
+ return job.getConfiguration(); // extract the modified clone
+ }
+
+ // separate static part for unit testing
+ public Configuration createHBaseConfiguration() {
+ Configuration conf = HBaseConfiguration.create();
+
+ // Setup the input data location for HBase snapshot import
+ // exportedSnapshotDir should be a GCS Bucket path.
+ conf.set("hbase.rootdir", hbaseSnapshotSourceDir);
+ conf.set("fs.defaultFS", hbaseSnapshotSourceDir);
+
+ // Setup GCS connector to use GCS as Hadoop filesystem
+ conf.set("fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS");
+ conf.set("fs.gs.project.id", projectId);
+ conf.setBoolean("google.cloud.auth.service.account.enable", true);
+
+ // Setup MapReduce config for TableSnapshotInputFormat
+ conf.setClass(
+ "mapreduce.job.inputformat.class", TableSnapshotInputFormat.class, InputFormat.class);
+ conf.setClass("key.class", ImmutableBytesWritable.class, Writable.class);
+ conf.setClass("value.class", Result.class, Object.class);
+ return conf;
+ }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java
new file mode 100644
index 0000000000..2d8ce7c31f
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.hbasesnapshots;
+
+import com.google.bigtable.repackaged.com.google.api.core.InternalExtensionOnly;
+import com.google.cloud.bigtable.beam.CloudBigtableIO;
+import com.google.cloud.bigtable.beam.TemplateUtils;
+import com.google.cloud.bigtable.beam.sequencefiles.HBaseResultToMutationFn;
+import com.google.cloud.bigtable.beam.sequencefiles.ImportJob;
+import com.google.cloud.bigtable.beam.sequencefiles.Utils;
+import com.google.common.annotations.VisibleForTesting;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.Wait;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/**
+ * A job that imports data from HBase snapshot exports hosted in Cloud Storage bucket into Cloud
+ * Bigtable.
+ *
+ * Example: If you have exported your HBase Snapshot to GCS bucket gs://$HBASE_EXPORT_ROOT_PATH
+ * and want to import snapshot gs://$HBASE_EXPORT_ROOT_PATH/.hbase-snapshot/$SNAPSHOT_NAME into
+ * Cloud Bigtable $TABLE in $INSTANCE, execute the following command to run the job directly:
+ *
+ *
+ * mvn compile exec:java \
+ * -DmainClass=com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot \
+ * -Dexec.args="--runner=DataflowRunner \
+ * --stagingLocation=gs://$STAGING_PATH \
+ * --project=$PROJECT \
+ * --bigtableInstanceId=$INSTANCE \
+ * --bigtableTableId=$TABLE \
+ * --hbaseSnapshotSourceDir=gs://$HBASE_EXPORT_ROOT_PATH \
+ * --snapshotName=$SNAPSHOT_NAME
+ *
+ *
+ * Note that in the case of job failures, the temp files generated in the .restore-$JOB_NAME
+ * directory under the snapshot export bucket will not get deleted. Hence one need to either launch
+ * a replacement job with the same jobName to re-run the job or manually delete this directory.
+ */
+@InternalExtensionOnly
+public class ImportJobFromHbaseSnapshot {
+ private static final Log LOG = LogFactory.getLog(ImportJobFromHbaseSnapshot.class);
+
+ public interface ImportOptions extends ImportJob.ImportOptions {
+ @Description("The HBase root dir where HBase snapshot files resides.")
+ String getHbaseSnapshotSourceDir();
+
+ @SuppressWarnings("unused")
+ void setHbaseSnapshotSourceDir(String hbaseSnapshotSourceDir);
+
+ @Description("Snapshot name")
+ String getSnapshotName();
+
+ @SuppressWarnings("unused")
+ void setSnapshotName(String snapshotName);
+ }
+
+ public static void main(String[] args) throws Exception {
+ PipelineOptionsFactory.register(ImportOptions.class);
+
+ ImportOptions opts =
+ PipelineOptionsFactory.fromArgs(args).withValidation().as(ImportOptions.class);
+
+ LOG.info("Building Pipeline");
+ Pipeline pipeline = buildPipeline(opts);
+ LOG.info("Running Pipeline");
+ PipelineResult result = pipeline.run();
+
+ if (opts.getWait()) {
+ Utils.waitForPipelineToFinish(result);
+ }
+ }
+
+ @VisibleForTesting
+ static Pipeline buildPipeline(ImportOptions opts) throws Exception {
+
+ Pipeline pipeline = Pipeline.create(Utils.tweakOptions(opts));
+ HBaseSnapshotInputConfigBuilder configurationBuilder =
+ new HBaseSnapshotInputConfigBuilder()
+ .setProjectId(opts.getProject())
+ .setHbaseSnapshotSourceDir(opts.getHbaseSnapshotSourceDir())
+ .setSnapshotName(opts.getSnapshotName())
+ .setRestoreDirSuffix(opts.getJobName())
+ .setRestoreDirSuffix(opts.getJobName());
+ PCollection> readResult =
+ pipeline.apply(
+ "Read from HBase Snapshot",
+ HadoopFormatIO.read()
+ .withConfiguration(configurationBuilder.build()));
+
+ readResult
+ .apply("Create Mutations", ParDo.of(new HBaseResultToMutationFn()))
+ .apply(
+ "Write to Bigtable",
+ CloudBigtableIO.writeToTable(TemplateUtils.BuildImportConfig(opts)));
+
+ final List> sourceAndRestoreFolders =
+ Arrays.asList(
+ KV.of(opts.getHbaseSnapshotSourceDir(), configurationBuilder.getRestoreDir()));
+ pipeline
+ .apply(Create.of(sourceAndRestoreFolders))
+ .apply(Wait.on(readResult))
+ .apply(ParDo.of(new CleanupHBaseSnapshotRestoreFilesFn()));
+
+ return pipeline;
+ }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java
index b4b3862817..4c794ed7eb 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java
@@ -57,7 +57,7 @@
* intended to be a preparation step before running an {@link ImportJob}.
*/
@InternalApi
-class CreateTableHelper {
+public class CreateTableHelper {
private static final Log LOG = LogFactory.getLog(CreateTableHelper.class);
@InternalApi
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java
index 6b2e628a5d..45954c7762 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java
@@ -15,6 +15,7 @@
*/
package com.google.cloud.bigtable.beam.sequencefiles;
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
@@ -43,7 +44,8 @@
* A {@link DoFn} function that converts a {@link Result} in the pipeline input to a {@link
* Mutation} for output.
*/
-class HBaseResultToMutationFn extends DoFn, Mutation> {
+@InternalApi
+public class HBaseResultToMutationFn extends DoFn, Mutation> {
private static Logger logger = LoggerFactory.getLogger(HBaseResultToMutationFn.class);
private static final long serialVersionUID = 1L;
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java
index 142c4a17ef..27e8f3debf 100644
--- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java
@@ -15,6 +15,7 @@
*/
package com.google.cloud.bigtable.beam.sequencefiles;
+import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
import org.apache.beam.runners.dataflow.DataflowRunner;
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
import org.apache.beam.sdk.PipelineResult;
@@ -28,7 +29,8 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-class Utils {
+@InternalApi
+public class Utils {
private static final Log LOG = LogFactory.getLog(Utils.class);
/**
@@ -74,7 +76,7 @@ public ResourceId apply(String input) {
*
* @param result
*/
- static void waitForPipelineToFinish(PipelineResult result) {
+ public static void waitForPipelineToFinish(PipelineResult result) {
try {
// Check to see if we are creating a template.
// This should throw {@link UnsupportedOperationException} when creating a template.
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc
new file mode 100644
index 0000000000..8fe4533a01
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc
new file mode 100644
index 0000000000..1467a17f1f
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
new file mode 100644
index 0000000000..83e482aac0
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo
@@ -0,0 +1,2 @@
+
+
test-snapshottestÙ÷¤×ä. (
\ No newline at end of file
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest
new file mode 100644
index 0000000000..180516dc03
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc
new file mode 100644
index 0000000000..ea5b25e778
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb
new file mode 100644
index 0000000000..dc8da56c10
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc
new file mode 100644
index 0000000000..51cacdd03b
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7
new file mode 100644
index 0000000000..cbd9f539b3
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7 differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc
new file mode 100644
index 0000000000..2c4de3ac0e
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154
new file mode 100644
index 0000000000..05a0cac912
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154 differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc
new file mode 100644
index 0000000000..931ebfb545
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18
new file mode 100644
index 0000000000..e77357601a
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18 differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc
new file mode 100644
index 0000000000..32f450dba4
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9
new file mode 100644
index 0000000000..d640fc8498
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc
new file mode 100644
index 0000000000..80317a1515
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc
new file mode 100644
index 0000000000..5320c6c58d
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc
new file mode 100644
index 0000000000..00a9d7720d
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4
new file mode 100644
index 0000000000..ee586c252e
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4 differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc
new file mode 100644
index 0000000000..1d7e3d8653
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030
new file mode 100644
index 0000000000..e8d9789f5e
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc
new file mode 100644
index 0000000000..ca57c97e2d
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1
new file mode 100644
index 0000000000..c119dd13ef
Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 differ
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
new file mode 100644
index 0000000000..7f8f8fc2db
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt
@@ -0,0 +1,107 @@
+create 'test', 'cf', {SPLITS => ["1", "2", "3", "4", "5", "6", "7", "8", "9"]}
+put 'test','1', 'cf:a', 'value1'
+put 'test','2', 'cf:a', 'value2'
+put 'test','3', 'cf:a', 'value3'
+put 'test','4', 'cf:a', 'value4'
+put 'test','5', 'cf:a', 'value5'
+put 'test','6', 'cf:a', 'value6'
+put 'test','7', 'cf:a', 'value7'
+put 'test','8', 'cf:a', 'value8'
+put 'test','9', 'cf:a', 'value9'
+put 'test','10', 'cf:a', 'value10'
+put 'test','11', 'cf:a', 'value11'
+put 'test','12', 'cf:a', 'value12'
+put 'test','13', 'cf:a', 'value13'
+put 'test','14', 'cf:a', 'value14'
+put 'test','15', 'cf:a', 'value15'
+put 'test','16', 'cf:a', 'value16'
+put 'test','17', 'cf:a', 'value17'
+put 'test','18', 'cf:a', 'value18'
+put 'test','19', 'cf:a', 'value19'
+put 'test','20', 'cf:a', 'value20'
+put 'test','21', 'cf:a', 'value21'
+put 'test','22', 'cf:a', 'value22'
+put 'test','23', 'cf:a', 'value23'
+put 'test','24', 'cf:a', 'value24'
+put 'test','25', 'cf:a', 'value25'
+put 'test','26', 'cf:a', 'value26'
+put 'test','27', 'cf:a', 'value27'
+put 'test','28', 'cf:a', 'value28'
+put 'test','29', 'cf:a', 'value29'
+put 'test','30', 'cf:a', 'value30'
+put 'test','31', 'cf:a', 'value31'
+put 'test','32', 'cf:a', 'value32'
+put 'test','33', 'cf:a', 'value33'
+put 'test','34', 'cf:a', 'value34'
+put 'test','35', 'cf:a', 'value35'
+put 'test','36', 'cf:a', 'value36'
+put 'test','37', 'cf:a', 'value37'
+put 'test','38', 'cf:a', 'value38'
+put 'test','39', 'cf:a', 'value39'
+put 'test','40', 'cf:a', 'value40'
+put 'test','41', 'cf:a', 'value41'
+put 'test','42', 'cf:a', 'value42'
+put 'test','43', 'cf:a', 'value43'
+put 'test','44', 'cf:a', 'value44'
+put 'test','45', 'cf:a', 'value45'
+put 'test','46', 'cf:a', 'value46'
+put 'test','47', 'cf:a', 'value47'
+put 'test','48', 'cf:a', 'value48'
+put 'test','49', 'cf:a', 'value49'
+put 'test','50', 'cf:a', 'value50'
+put 'test','51', 'cf:a', 'value51'
+put 'test','52', 'cf:a', 'value52'
+put 'test','53', 'cf:a', 'value53'
+put 'test','54', 'cf:a', 'value54'
+put 'test','55', 'cf:a', 'value55'
+put 'test','56', 'cf:a', 'value56'
+put 'test','57', 'cf:a', 'value57'
+put 'test','58', 'cf:a', 'value58'
+put 'test','59', 'cf:a', 'value59'
+put 'test','60', 'cf:a', 'value60'
+put 'test','61', 'cf:a', 'value61'
+put 'test','62', 'cf:a', 'value62'
+put 'test','63', 'cf:a', 'value63'
+put 'test','64', 'cf:a', 'value64'
+put 'test','65', 'cf:a', 'value65'
+put 'test','66', 'cf:a', 'value66'
+put 'test','67', 'cf:a', 'value67'
+put 'test','68', 'cf:a', 'value68'
+put 'test','69', 'cf:a', 'value69'
+put 'test','70', 'cf:a', 'value70'
+put 'test','71', 'cf:a', 'value71'
+put 'test','72', 'cf:a', 'value72'
+put 'test','73', 'cf:a', 'value73'
+put 'test','74', 'cf:a', 'value74'
+put 'test','75', 'cf:a', 'value75'
+put 'test','76', 'cf:a', 'value76'
+put 'test','77', 'cf:a', 'value77'
+put 'test','78', 'cf:a', 'value78'
+put 'test','79', 'cf:a', 'value79'
+put 'test','80', 'cf:a', 'value80'
+put 'test','81', 'cf:a', 'value81'
+put 'test','82', 'cf:a', 'value82'
+put 'test','83', 'cf:a', 'value83'
+put 'test','84', 'cf:a', 'value84'
+put 'test','85', 'cf:a', 'value85'
+put 'test','86', 'cf:a', 'value86'
+put 'test','87', 'cf:a', 'value87'
+put 'test','88', 'cf:a', 'value88'
+put 'test','89', 'cf:a', 'value89'
+put 'test','90', 'cf:a', 'value90'
+put 'test','91', 'cf:a', 'value91'
+put 'test','92', 'cf:a', 'value92'
+put 'test','93', 'cf:a', 'value93'
+put 'test','94', 'cf:a', 'value94'
+put 'test','95', 'cf:a', 'value95'
+put 'test','96', 'cf:a', 'value96'
+put 'test','97', 'cf:a', 'value97'
+put 'test','98', 'cf:a', 'value98'
+put 'test','99', 'cf:a', 'value99'
+put 'test','100', 'cf:a', 'value100'
+snapshot 'test', 'test-snapshot'
+list_snapshots
+
+disable 'test'
+drop 'test'
+exit
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFnTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFnTest.java
new file mode 100644
index 0000000000..0183f856f1
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFnTest.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.hbasesnapshots;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
+
+import java.util.UUID;
+import org.junit.Test;
+
+public class CleanupHBaseSnapshotRestoreFilesFnTest {
+ private static final String TEST_BUCKET_NAME = "test-bucket";
+ private static final String TEST_SNAPSHOT_PATH = "gs://" + TEST_BUCKET_NAME + "/hbase-export";
+ private static final String TEST_RESTORE_PATH =
+ HBaseSnapshotInputConfigBuilder.RESTORE_DIR + UUID.randomUUID();
+ private static final String TEST_RESTORE_PREFIX = TEST_RESTORE_PATH.substring(1);
+
+ @Test
+ public void testGetWorkingBucketName() {
+ assertEquals(
+ TEST_BUCKET_NAME,
+ CleanupHBaseSnapshotRestoreFilesFn.getWorkingBucketName(TEST_SNAPSHOT_PATH));
+
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ CleanupHBaseSnapshotRestoreFilesFn.getWorkingBucketName(TEST_BUCKET_NAME);
+ });
+ }
+
+ @Test
+ public void testGetListPrefix() {
+ assertEquals(
+ TEST_RESTORE_PREFIX, CleanupHBaseSnapshotRestoreFilesFn.getListPrefix(TEST_RESTORE_PATH));
+
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ CleanupHBaseSnapshotRestoreFilesFn.getWorkingBucketName(TEST_RESTORE_PREFIX);
+ });
+ }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
new file mode 100644
index 0000000000..62f1cdced2
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java
@@ -0,0 +1,190 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.hbasesnapshots;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.api.services.storage.model.Objects;
+import com.google.cloud.bigtable.beam.sequencefiles.testing.BigtableTableUtils;
+import com.google.cloud.bigtable.hbase.BigtableConfiguration;
+import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
+import java.io.File;
+import java.io.IOException;
+import java.util.UUID;
+import org.apache.beam.runners.dataflow.DataflowRunner;
+import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
+import org.apache.beam.sdk.PipelineResult.State;
+import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
+import org.apache.beam.sdk.extensions.gcp.util.GcsUtil;
+import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/*
+ * End to end integration test for pipeline that import HBase snapshot data into Cloud Bigtable.
+ * Prepare test data with gsutil(https://cloud.google.com/storage/docs/quickstart-gsutil):
+ * gsutil -m cp -r /bigtable-dataflow-parent/bigtable-beam-import/src/test/data/ \
+ * gs:///integration-test/
+ *
+ * Setup GCP credential: https://cloud.google.com/docs/authentication
+ * Ensure your credential have access to Bigtable and Dataflow
+ *
+ * Run with:
+ * mvn integration-test -PhbasesnapshotsIntegrationTest \
+ * -Dgoogle.bigtable.project.id= \
+ * -Dgoogle.bigtable.instance.id= \
+ * -Dgoogle.dataflow.stagingLocation=gs:///staging \
+ * -Dcloud.test.data.folder=gs:///integration-test/
+ */
+public class EndToEndIT {
+
+ private final Log LOG = LogFactory.getLog(getClass());
+ private static final String TEST_SNAPSHOT_NAME = "test-snapshot";
+ // Location of test data hosted on Google Cloud Storage, for on-cloud dataflow tests.
+ private static final String CLOUD_TEST_DATA_FOLDER = "cloud.test.data.folder";
+ private static final String DATAFLOW_REGION = "region";
+
+ // Column family name used in all test bigtables.
+ private static final String CF = "cf";
+
+ // Full path of the Cloud Storage folder where dataflow jars are uploaded to.
+ private static final String GOOGLE_DATAFLOW_STAGING_LOCATION = "google.dataflow.stagingLocation";
+
+ private Connection connection;
+ private String projectId;
+ private String instanceId;
+ private String tableId;
+ private String region;
+
+ private GcsUtil gcsUtil;
+ private String dataflowStagingLocation;
+ private String workDir;
+ private byte[][] keySplits;
+
+ // Snapshot data setup
+ private String hbaseSnapshotDir;
+
+ @Before
+ public void setup() throws Exception {
+ projectId = getTestProperty(BigtableOptionsFactory.PROJECT_ID_KEY);
+ instanceId = getTestProperty(BigtableOptionsFactory.INSTANCE_ID_KEY);
+ dataflowStagingLocation = getTestProperty(GOOGLE_DATAFLOW_STAGING_LOCATION);
+ region = getTestProperty(DATAFLOW_REGION);
+ String cloudTestDataFolder = getTestProperty(CLOUD_TEST_DATA_FOLDER);
+ if (!cloudTestDataFolder.endsWith(File.separator)) {
+ cloudTestDataFolder = cloudTestDataFolder + File.separator;
+ }
+
+ hbaseSnapshotDir = cloudTestDataFolder + "data/";
+ UUID test_uuid = UUID.randomUUID();
+
+ // Cloud Storage config
+ GcpOptions gcpOptions = PipelineOptionsFactory.create().as(GcpOptions.class);
+ gcpOptions.setProject(projectId);
+ gcsUtil = new GcsUtil.GcsUtilFactory().create(gcpOptions);
+
+ // Bigtable config
+ connection = BigtableConfiguration.connect(projectId, instanceId);
+ tableId = "test_" + UUID.randomUUID().toString();
+
+ LOG.info("Setting up integration tests");
+
+ String[] keys = new String[] {"1", "2", "3", "4", "5", "6", "7", "8", "9"};
+ keySplits = new byte[keys.length][];
+ for (int i = 0; i < keys.length; i++) {
+ keySplits[i] = keys[i].getBytes();
+ }
+ }
+
+ private static String getTestProperty(String name) {
+ return checkNotNull(System.getProperty(name), "Required property missing: " + name);
+ }
+
+ @After
+ public void teardown() throws IOException {
+ connection.close();
+
+ // delete test table
+ BigtableConfiguration.connect(projectId, instanceId)
+ .getAdmin()
+ .deleteTable(TableName.valueOf(tableId));
+ }
+
+ @Test
+ public void testHBaseSnapshotImport() throws Exception {
+
+ // Crete table
+ TableName tableName = TableName.valueOf(tableId);
+ HTableDescriptor descriptor = new HTableDescriptor(tableName);
+
+ descriptor.addFamily(new HColumnDescriptor(CF));
+
+ connection.getAdmin().createTable(descriptor, SnapshotTestingUtils.getSplitKeys());
+
+ // Start import
+ DataflowPipelineOptions importPipelineOpts =
+ PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ importPipelineOpts.setRunner(DataflowRunner.class);
+ importPipelineOpts.setGcpTempLocation(dataflowStagingLocation);
+ importPipelineOpts.setNumWorkers(1);
+ importPipelineOpts.setProject(projectId);
+ importPipelineOpts.setRegion(region);
+
+ ImportJobFromHbaseSnapshot.ImportOptions importOpts =
+ importPipelineOpts.as(ImportJobFromHbaseSnapshot.ImportOptions.class);
+
+ // setup GCP and bigtable
+ importOpts.setBigtableProject(StaticValueProvider.of(projectId));
+ importOpts.setBigtableInstanceId(StaticValueProvider.of(instanceId));
+ importOpts.setBigtableTableId(StaticValueProvider.of(tableId));
+
+ // setup HBase snapshot info
+ importOpts.setHbaseSnapshotSourceDir(hbaseSnapshotDir);
+ importOpts.setSnapshotName(TEST_SNAPSHOT_NAME);
+
+ // run pipeline
+ State state = ImportJobFromHbaseSnapshot.buildPipeline(importOpts).run().waitUntilFinish();
+ Assert.assertEquals(State.DONE, state);
+
+ // check data in bigtable
+ BigtableTableUtils destTable = new BigtableTableUtils(connection, tableId, CF);
+ Assert.assertEquals(
+ 100 /* There are 100 rows in test snapshot*/,
+ destTable.readAllCellsFromTable().toArray().length);
+
+ // check that the .restore dir used for temp files has been removed
+ Objects objects =
+ gcsUtil.listObjects(
+ GcsPath.fromUri(hbaseSnapshotDir).getBucket(),
+ CleanupHBaseSnapshotRestoreFilesFn.getListPrefix(
+ HBaseSnapshotInputConfigBuilder.RESTORE_DIR),
+ null);
+ Assert.assertNull(objects.getItems());
+
+ // TODO(vermas2012): Add more validations after this.
+ }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilderTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilderTest.java
new file mode 100644
index 0000000000..579a57c238
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilderTest.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.beam.hbasesnapshots;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.junit.Test;
+
+public class HBaseSnapshotInputConfigBuilderTest {
+
+ private static final String TEST_PROJECT = "test_project";
+ private static final String TEST_SNAPSHOT_DIR = "gs://test-bucket/hbase-export";
+ private static final String TEST_SNAPSHOT_NAME = "test_snapshot";
+
+ @Test
+ public void testBuildingHBaseSnapshotInputConfigBuilder() {
+ Configuration conf =
+ new HBaseSnapshotInputConfigBuilder()
+ .setProjectId(TEST_PROJECT)
+ .setHbaseSnapshotSourceDir(TEST_SNAPSHOT_DIR)
+ .setSnapshotName(TEST_SNAPSHOT_NAME)
+ .createHBaseConfiguration();
+ assertEquals(
+ "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS", conf.get("fs.AbstractFileSystem.gs.impl"));
+ assertEquals(TEST_PROJECT, conf.get("fs.gs.project.id"));
+ assertEquals(TEST_SNAPSHOT_DIR, conf.get("hbase.rootdir"));
+ assertEquals(
+ TableSnapshotInputFormat.class,
+ conf.getClass(
+ "mapreduce.job.inputformat.class", TableSnapshotInputFormat.class, InputFormat.class));
+ }
+}
diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/README.md b/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/README.md
new file mode 100644
index 0000000000..3d9b722bb9
--- /dev/null
+++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/README.md
@@ -0,0 +1,18 @@
+# Generating the test HBase snapshot for HBase snapshot import integration tests
+
+The file `generate_test_data.txt` is an HBase command line command sequence
+used to generated the testing HBase snapshot data.
+
+If you need to modify the test data used by `bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java`,
+Please make sure you have HBase installed and export `/bin` to your PATH.
+
+Then:
+
+ $ hbase shell ./generate_test_data.txt
+ $ hbase org.apache.hadoop.hbase.snapshot.ExportSnapshot -Dmapreduce.framework.name=local -snapshot test-snapshot -copy-to file:////data
+
+ $ cd
+ $ gsutil -m cp -r ./data/ gs:///integration-test/
+
+After this, you use be able to run the integration test with your new data by specifying
+`-Dcloud.test.data.folder=gs:///integration-test/`
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 790d7a0c06..fb77fefd26 100644
--- a/pom.xml
+++ b/pom.xml
@@ -79,7 +79,7 @@ limitations under the License.
2.24.0
30.1-android
- 20.0
+ 29.0-jre
1.7.4
1.29.0