diff --git a/bigtable-dataflow-parent/bigtable-beam-import/README.md b/bigtable-dataflow-parent/bigtable-beam-import/README.md index abe8037f30..ccbc627603 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/README.md +++ b/bigtable-dataflow-parent/bigtable-beam-import/README.md @@ -42,4 +42,4 @@ java -jar bigtable-beam-import-1.14.1-shaded.jar import \ --maxNumWorkers=[3x number of nodes] \ --zone=[zone of your cluster] ``` -[//]: # ({x-version-update-end}) \ No newline at end of file +[//]: # ({x-version-update-end}) diff --git a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml index 24de03f557..c35c5738e2 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml +++ b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml @@ -25,7 +25,7 @@ limitations under the License. bigtable-beam-import - com.google.cloud.bigtable.beam.sequencefiles.Main + com.google.cloud.bigtable.beam.Main @@ -49,6 +49,12 @@ limitations under the License. ${project.groupId} bigtable-hbase-beam ${project.version} + + + org.apache.hbase + hbase-shaded-client + + com.google.cloud.bigtable @@ -67,6 +73,10 @@ limitations under the License. io.opencensus * + + org.apache.hbase + hbase-shaded-client + @@ -87,10 +97,18 @@ limitations under the License. beam-sdks-java-io-hadoop-common ${beam.version} + + org.apache.beam + beam-sdks-java-io-hadoop-format + ${beam.version} + + org.apache.hbase - hbase-shaded-client + hbase-shaded-server ${hbase.version} @@ -104,7 +122,7 @@ limitations under the License. com.google.guava guava - ${guava.version} + ${gcs-guava.version} @@ -149,6 +167,21 @@ limitations under the License. slf4j-api ${slf4j.version} + + + com.google.cloud.bigdataoss + gcs-connector + hadoop2-2.1.4 + shaded + + + + + com.google.apis + google-api-services-storage + v1-rev171-1.25.0 + + @@ -181,6 +214,12 @@ limitations under the License. ${junit.version} test + + org.apache.hbase + hbase-shaded-testing-util + ${hbase.version} + test + @@ -265,6 +304,16 @@ limitations under the License. + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + @@ -376,5 +425,33 @@ limitations under the License. + + + hbasesnapshotsIntegrationTest + + + + org.apache.maven.plugins + maven-failsafe-plugin + + + hbasesnapshots-integration-test + + integration-test + + integration-test + + 1 + + **/hbasesnapshots/*IT.java + + false + + + + + + + diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java new file mode 100644 index 0000000000..b346b90837 --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java @@ -0,0 +1,79 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.beam; + +import com.google.bigtable.repackaged.com.google.api.core.InternalApi; +import com.google.bigtable.repackaged.com.google.api.core.InternalExtensionOnly; +import com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot; +import com.google.cloud.bigtable.beam.sequencefiles.CreateTableHelper; +import com.google.cloud.bigtable.beam.sequencefiles.ExportJob; +import com.google.cloud.bigtable.beam.sequencefiles.ImportJob; +import java.io.File; +import java.net.URISyntaxException; +import java.util.Arrays; + +/** Entry point for create-table/import/export job submission. */ +@InternalExtensionOnly +final class Main { + /** For internal use only - public for technical reasons. */ + @InternalApi("For internal usage only") + public Main() {} + + public static void main(String[] args) throws Exception { + if (args.length < 1) { + usage(); + System.exit(1); + } + + String[] subArgs = Arrays.copyOfRange(args, 1, args.length); + + switch (args[0]) { + case "export": + ExportJob.main(subArgs); + break; + case "import": + ImportJob.main(subArgs); + break; + case "importsnapshot": + ImportJobFromHbaseSnapshot.main(subArgs); + break; + case "create-table": + CreateTableHelper.main(subArgs); + break; + default: + usage(); + System.exit(1); + } + } + + private static void usage() { + String jarName; + + try { + jarName = + new File(Main.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath()) + .getName(); + } catch (URISyntaxException e) { + jarName = ""; + } + + System.out.printf( + "java -jar %s \n" + + "Where can be 'export', 'import' , 'importsnapshot' or 'create-table'. To get further help, run: \n" + + "java -jar %s --help\n", + jarName, jarName); + } +} diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFn.java new file mode 100644 index 0000000000..e0bdca69d5 --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFn.java @@ -0,0 +1,84 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.beam.hbasesnapshots; + +import com.google.api.services.storage.model.Objects; +import com.google.common.base.Preconditions; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; +import org.apache.beam.sdk.extensions.gcp.util.GcsUtil; +import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.values.KV; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * A {@link DoFn} that could be used for cleaning up temp files generated during HBase snapshot + * scans in Google Cloud Storage(GCS) bucket via GCS connector. + */ +class CleanupHBaseSnapshotRestoreFilesFn extends DoFn, Boolean> { + private static final Log LOG = LogFactory.getLog(CleanupHBaseSnapshotRestoreFilesFn.class); + + @ProcessElement + public void processElement(ProcessContext context) throws IOException { + KV elem = context.element(); + + String hbaseSnapshotDir = elem.getKey(); + String restorePath = elem.getValue(); + String prefix = getListPrefix(restorePath); + String bucketName = getWorkingBucketName(hbaseSnapshotDir); + Preconditions.checkState( + !prefix.isEmpty() && !hbaseSnapshotDir.contains(String.format("%s/%s", bucketName, prefix)), + "restore folder should not be empty or a subfolder of hbaseSnapshotSourceDir"); + GcpOptions gcpOptions = context.getPipelineOptions().as(GcpOptions.class); + GcsUtil gcsUtil = new GcsUtil.GcsUtilFactory().create(gcpOptions); + + String pageToken = null; + List results = new ArrayList<>(); + do { + Objects objects = gcsUtil.listObjects(bucketName, prefix, pageToken); + if (objects.getItems() == null) { + break; + } + + objects.getItems().stream() + .map(storageObject -> GcsPath.fromObject(storageObject).toString()) + .forEach(results::add); + pageToken = objects.getNextPageToken(); + } while (pageToken != null); + gcsUtil.remove(results); + context.output(true); + } + + public static String getWorkingBucketName(String hbaseSnapshotDir) { + Preconditions.checkArgument( + hbaseSnapshotDir.startsWith(GcsPath.SCHEME), + "snapshot folder must be hosted in a GCS bucket "); + + return GcsPath.fromUri(hbaseSnapshotDir).getBucket(); + } + // getListPrefix convert absolute restorePath in a Hadoop filesystem + // to a match prefix in a GCS bucket + public static String getListPrefix(String restorePath) { + Preconditions.checkArgument( + restorePath.startsWith("/"), + "restore folder must be an absolute path in current filesystem"); + return restorePath.substring(1); + } +} diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilder.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilder.java new file mode 100644 index 0000000000..63ebddb20a --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilder.java @@ -0,0 +1,140 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.beam.hbasesnapshots; + +import com.google.common.base.Preconditions; +import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.mapreduce.TableInputFormat; +import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat; +import org.apache.hadoop.hbase.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; +import org.apache.hadoop.hbase.util.Base64; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.Job; + +/** + * A {@link Configuration} that could be used in {@link HadoopFormatIO} for reading HBase snapshot + * hosted in Google Cloud Storage(GCS) bucket via GCS connector. It uses {@link + * TableSnapshotInputFormat} for reading HBase snapshots. + */ +class HBaseSnapshotInputConfigBuilder { + + private static final Log LOG = LogFactory.getLog(HBaseSnapshotInputConfigBuilder.class); + // Batch size used for HBase snapshot scans + private static final int BATCH_SIZE = 1000; + + // a temp location to store metadata extracted from snapshot + public static final String RESTORE_DIR = "/.restore"; + + private String projectId; + private String hbaseSnapshotSourceDir; + private String snapshotName; + private String restoreDirSuffix; + + public HBaseSnapshotInputConfigBuilder() {} + + /* + * Set the project id use to access the GCS bucket with HBase snapshot data to be imported + */ + public HBaseSnapshotInputConfigBuilder setProjectId(String projectId) { + this.projectId = projectId; + return this; + } + + /* + * Set the GCS path where the HBase snapshot data is located + */ + public HBaseSnapshotInputConfigBuilder setHbaseSnapshotSourceDir(String hbaseSnapshotSourceDir) { + this.hbaseSnapshotSourceDir = hbaseSnapshotSourceDir; + return this; + } + + /* + * Set the name of the snapshot to be imported + * e.g when importing snapshot 'gs:///hbase-export/table_snapshot' + * put 'table_snapshot' as the {@code snapshotName} + * and 'gs:///hbase-export' as {@code exportedSnapshotDir} + */ + public HBaseSnapshotInputConfigBuilder setSnapshotName(String snapshotName) { + this.snapshotName = snapshotName; + return this; + } + + /* + * Set the unique suffix to be used for restore folder to avoid conflicts + */ + public HBaseSnapshotInputConfigBuilder setRestoreDirSuffix(String suffix) { + this.restoreDirSuffix = suffix; + return this; + } + + public String getRestoreDir() { + return RESTORE_DIR + this.restoreDirSuffix; + } + + public Configuration build() throws Exception { + Preconditions.checkNotNull(projectId, "Required value projectId must be set"); + Preconditions.checkNotNull( + hbaseSnapshotSourceDir, "Required value hbaseSnapshotSourceDir must be set"); + Preconditions.checkNotNull(snapshotName, "Required value snapshotName must be set"); + Preconditions.checkState( + hbaseSnapshotSourceDir.startsWith(GcsPath.SCHEME), + "snapshot folder must be hosted in a GCS bucket "); + + Configuration conf = createHBaseConfiguration(); + + // Configuring a MapReduce Job base on HBaseConfiguration + // and return the job Configuration + ClientProtos.Scan proto = ProtobufUtil.toScan(new Scan().setBatch(BATCH_SIZE)); + conf.set(TableInputFormat.SCAN, Base64.encodeBytes(proto.toByteArray())); + Job job = Job.getInstance(conf); // creates internal clone of hbaseConf + // the restore folder need to under current bucket root so to be considered + // within the same filesystem with the hbaseSnapshotSourceDir + TableSnapshotInputFormat.setInput(job, snapshotName, new Path(getRestoreDir())); + return job.getConfiguration(); // extract the modified clone + } + + // separate static part for unit testing + public Configuration createHBaseConfiguration() { + Configuration conf = HBaseConfiguration.create(); + + // Setup the input data location for HBase snapshot import + // exportedSnapshotDir should be a GCS Bucket path. + conf.set("hbase.rootdir", hbaseSnapshotSourceDir); + conf.set("fs.defaultFS", hbaseSnapshotSourceDir); + + // Setup GCS connector to use GCS as Hadoop filesystem + conf.set("fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS"); + conf.set("fs.gs.project.id", projectId); + conf.setBoolean("google.cloud.auth.service.account.enable", true); + + // Setup MapReduce config for TableSnapshotInputFormat + conf.setClass( + "mapreduce.job.inputformat.class", TableSnapshotInputFormat.class, InputFormat.class); + conf.setClass("key.class", ImmutableBytesWritable.class, Writable.class); + conf.setClass("value.class", Result.class, Object.class); + return conf; + } +} diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java new file mode 100644 index 0000000000..2d8ce7c31f --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java @@ -0,0 +1,133 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.beam.hbasesnapshots; + +import com.google.bigtable.repackaged.com.google.api.core.InternalExtensionOnly; +import com.google.cloud.bigtable.beam.CloudBigtableIO; +import com.google.cloud.bigtable.beam.TemplateUtils; +import com.google.cloud.bigtable.beam.sequencefiles.HBaseResultToMutationFn; +import com.google.cloud.bigtable.beam.sequencefiles.ImportJob; +import com.google.cloud.bigtable.beam.sequencefiles.Utils; +import com.google.common.annotations.VisibleForTesting; +import java.util.Arrays; +import java.util.List; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.PipelineResult; +import org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.Wait; +import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.values.PCollection; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; + +/** + * A job that imports data from HBase snapshot exports hosted in Cloud Storage bucket into Cloud + * Bigtable. + * + *

Example: If you have exported your HBase Snapshot to GCS bucket gs://$HBASE_EXPORT_ROOT_PATH + * and want to import snapshot gs://$HBASE_EXPORT_ROOT_PATH/.hbase-snapshot/$SNAPSHOT_NAME into + * Cloud Bigtable $TABLE in $INSTANCE, execute the following command to run the job directly: + * + *

+ * mvn compile exec:java \
+ *   -DmainClass=com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot \
+ *   -Dexec.args="--runner=DataflowRunner \
+ *                --stagingLocation=gs://$STAGING_PATH \
+ *                --project=$PROJECT \
+ *                --bigtableInstanceId=$INSTANCE \
+ *                --bigtableTableId=$TABLE \
+ *                --hbaseSnapshotSourceDir=gs://$HBASE_EXPORT_ROOT_PATH \
+ *                --snapshotName=$SNAPSHOT_NAME
+ * 
+ * + * Note that in the case of job failures, the temp files generated in the .restore-$JOB_NAME + * directory under the snapshot export bucket will not get deleted. Hence one need to either launch + * a replacement job with the same jobName to re-run the job or manually delete this directory. + */ +@InternalExtensionOnly +public class ImportJobFromHbaseSnapshot { + private static final Log LOG = LogFactory.getLog(ImportJobFromHbaseSnapshot.class); + + public interface ImportOptions extends ImportJob.ImportOptions { + @Description("The HBase root dir where HBase snapshot files resides.") + String getHbaseSnapshotSourceDir(); + + @SuppressWarnings("unused") + void setHbaseSnapshotSourceDir(String hbaseSnapshotSourceDir); + + @Description("Snapshot name") + String getSnapshotName(); + + @SuppressWarnings("unused") + void setSnapshotName(String snapshotName); + } + + public static void main(String[] args) throws Exception { + PipelineOptionsFactory.register(ImportOptions.class); + + ImportOptions opts = + PipelineOptionsFactory.fromArgs(args).withValidation().as(ImportOptions.class); + + LOG.info("Building Pipeline"); + Pipeline pipeline = buildPipeline(opts); + LOG.info("Running Pipeline"); + PipelineResult result = pipeline.run(); + + if (opts.getWait()) { + Utils.waitForPipelineToFinish(result); + } + } + + @VisibleForTesting + static Pipeline buildPipeline(ImportOptions opts) throws Exception { + + Pipeline pipeline = Pipeline.create(Utils.tweakOptions(opts)); + HBaseSnapshotInputConfigBuilder configurationBuilder = + new HBaseSnapshotInputConfigBuilder() + .setProjectId(opts.getProject()) + .setHbaseSnapshotSourceDir(opts.getHbaseSnapshotSourceDir()) + .setSnapshotName(opts.getSnapshotName()) + .setRestoreDirSuffix(opts.getJobName()) + .setRestoreDirSuffix(opts.getJobName()); + PCollection> readResult = + pipeline.apply( + "Read from HBase Snapshot", + HadoopFormatIO.read() + .withConfiguration(configurationBuilder.build())); + + readResult + .apply("Create Mutations", ParDo.of(new HBaseResultToMutationFn())) + .apply( + "Write to Bigtable", + CloudBigtableIO.writeToTable(TemplateUtils.BuildImportConfig(opts))); + + final List> sourceAndRestoreFolders = + Arrays.asList( + KV.of(opts.getHbaseSnapshotSourceDir(), configurationBuilder.getRestoreDir())); + pipeline + .apply(Create.of(sourceAndRestoreFolders)) + .apply(Wait.on(readResult)) + .apply(ParDo.of(new CleanupHBaseSnapshotRestoreFilesFn())); + + return pipeline; + } +} diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java index b4b3862817..4c794ed7eb 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java @@ -57,7 +57,7 @@ * intended to be a preparation step before running an {@link ImportJob}. */ @InternalApi -class CreateTableHelper { +public class CreateTableHelper { private static final Log LOG = LogFactory.getLog(CreateTableHelper.class); @InternalApi diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java index 6b2e628a5d..45954c7762 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java @@ -15,6 +15,7 @@ */ package com.google.cloud.bigtable.beam.sequencefiles; +import com.google.bigtable.repackaged.com.google.api.core.InternalApi; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Predicate; @@ -43,7 +44,8 @@ * A {@link DoFn} function that converts a {@link Result} in the pipeline input to a {@link * Mutation} for output. */ -class HBaseResultToMutationFn extends DoFn, Mutation> { +@InternalApi +public class HBaseResultToMutationFn extends DoFn, Mutation> { private static Logger logger = LoggerFactory.getLogger(HBaseResultToMutationFn.class); private static final long serialVersionUID = 1L; diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java index 142c4a17ef..27e8f3debf 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java @@ -15,6 +15,7 @@ */ package com.google.cloud.bigtable.beam.sequencefiles; +import com.google.bigtable.repackaged.com.google.api.core.InternalApi; import org.apache.beam.runners.dataflow.DataflowRunner; import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; import org.apache.beam.sdk.PipelineResult; @@ -28,7 +29,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -class Utils { +@InternalApi +public class Utils { private static final Log LOG = LogFactory.getLog(Utils.class); /** @@ -74,7 +76,7 @@ public ResourceId apply(String input) { * * @param result */ - static void waitForPipelineToFinish(PipelineResult result) { + public static void waitForPipelineToFinish(PipelineResult result) { try { // Check to see if we are creating a template. // This should throw {@link UnsupportedOperationException} when creating a template. diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc new file mode 100644 index 0000000000..8fe4533a01 Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc new file mode 100644 index 0000000000..1467a17f1f Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo new file mode 100644 index 0000000000..83e482aac0 --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo @@ -0,0 +1,2 @@ + + test-snapshottestÙ÷¤×ä. ( \ No newline at end of file diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest new file mode 100644 index 0000000000..180516dc03 Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc new file mode 100644 index 0000000000..ea5b25e778 Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb new file mode 100644 index 0000000000..dc8da56c10 Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc new file mode 100644 index 0000000000..51cacdd03b Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7 new file mode 100644 index 0000000000..cbd9f539b3 Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7 differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc new file mode 100644 index 0000000000..2c4de3ac0e Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154 new file mode 100644 index 0000000000..05a0cac912 Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154 differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc new file mode 100644 index 0000000000..931ebfb545 Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18 new file mode 100644 index 0000000000..e77357601a Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18 differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc new file mode 100644 index 0000000000..32f450dba4 Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 new file mode 100644 index 0000000000..d640fc8498 Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc new file mode 100644 index 0000000000..80317a1515 Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc new file mode 100644 index 0000000000..5320c6c58d Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc new file mode 100644 index 0000000000..00a9d7720d Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4 new file mode 100644 index 0000000000..ee586c252e Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4 differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc new file mode 100644 index 0000000000..1d7e3d8653 Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 new file mode 100644 index 0000000000..e8d9789f5e Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc new file mode 100644 index 0000000000..ca57c97e2d Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 new file mode 100644 index 0000000000..c119dd13ef Binary files /dev/null and b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 differ diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt new file mode 100644 index 0000000000..7f8f8fc2db --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt @@ -0,0 +1,107 @@ +create 'test', 'cf', {SPLITS => ["1", "2", "3", "4", "5", "6", "7", "8", "9"]} +put 'test','1', 'cf:a', 'value1' +put 'test','2', 'cf:a', 'value2' +put 'test','3', 'cf:a', 'value3' +put 'test','4', 'cf:a', 'value4' +put 'test','5', 'cf:a', 'value5' +put 'test','6', 'cf:a', 'value6' +put 'test','7', 'cf:a', 'value7' +put 'test','8', 'cf:a', 'value8' +put 'test','9', 'cf:a', 'value9' +put 'test','10', 'cf:a', 'value10' +put 'test','11', 'cf:a', 'value11' +put 'test','12', 'cf:a', 'value12' +put 'test','13', 'cf:a', 'value13' +put 'test','14', 'cf:a', 'value14' +put 'test','15', 'cf:a', 'value15' +put 'test','16', 'cf:a', 'value16' +put 'test','17', 'cf:a', 'value17' +put 'test','18', 'cf:a', 'value18' +put 'test','19', 'cf:a', 'value19' +put 'test','20', 'cf:a', 'value20' +put 'test','21', 'cf:a', 'value21' +put 'test','22', 'cf:a', 'value22' +put 'test','23', 'cf:a', 'value23' +put 'test','24', 'cf:a', 'value24' +put 'test','25', 'cf:a', 'value25' +put 'test','26', 'cf:a', 'value26' +put 'test','27', 'cf:a', 'value27' +put 'test','28', 'cf:a', 'value28' +put 'test','29', 'cf:a', 'value29' +put 'test','30', 'cf:a', 'value30' +put 'test','31', 'cf:a', 'value31' +put 'test','32', 'cf:a', 'value32' +put 'test','33', 'cf:a', 'value33' +put 'test','34', 'cf:a', 'value34' +put 'test','35', 'cf:a', 'value35' +put 'test','36', 'cf:a', 'value36' +put 'test','37', 'cf:a', 'value37' +put 'test','38', 'cf:a', 'value38' +put 'test','39', 'cf:a', 'value39' +put 'test','40', 'cf:a', 'value40' +put 'test','41', 'cf:a', 'value41' +put 'test','42', 'cf:a', 'value42' +put 'test','43', 'cf:a', 'value43' +put 'test','44', 'cf:a', 'value44' +put 'test','45', 'cf:a', 'value45' +put 'test','46', 'cf:a', 'value46' +put 'test','47', 'cf:a', 'value47' +put 'test','48', 'cf:a', 'value48' +put 'test','49', 'cf:a', 'value49' +put 'test','50', 'cf:a', 'value50' +put 'test','51', 'cf:a', 'value51' +put 'test','52', 'cf:a', 'value52' +put 'test','53', 'cf:a', 'value53' +put 'test','54', 'cf:a', 'value54' +put 'test','55', 'cf:a', 'value55' +put 'test','56', 'cf:a', 'value56' +put 'test','57', 'cf:a', 'value57' +put 'test','58', 'cf:a', 'value58' +put 'test','59', 'cf:a', 'value59' +put 'test','60', 'cf:a', 'value60' +put 'test','61', 'cf:a', 'value61' +put 'test','62', 'cf:a', 'value62' +put 'test','63', 'cf:a', 'value63' +put 'test','64', 'cf:a', 'value64' +put 'test','65', 'cf:a', 'value65' +put 'test','66', 'cf:a', 'value66' +put 'test','67', 'cf:a', 'value67' +put 'test','68', 'cf:a', 'value68' +put 'test','69', 'cf:a', 'value69' +put 'test','70', 'cf:a', 'value70' +put 'test','71', 'cf:a', 'value71' +put 'test','72', 'cf:a', 'value72' +put 'test','73', 'cf:a', 'value73' +put 'test','74', 'cf:a', 'value74' +put 'test','75', 'cf:a', 'value75' +put 'test','76', 'cf:a', 'value76' +put 'test','77', 'cf:a', 'value77' +put 'test','78', 'cf:a', 'value78' +put 'test','79', 'cf:a', 'value79' +put 'test','80', 'cf:a', 'value80' +put 'test','81', 'cf:a', 'value81' +put 'test','82', 'cf:a', 'value82' +put 'test','83', 'cf:a', 'value83' +put 'test','84', 'cf:a', 'value84' +put 'test','85', 'cf:a', 'value85' +put 'test','86', 'cf:a', 'value86' +put 'test','87', 'cf:a', 'value87' +put 'test','88', 'cf:a', 'value88' +put 'test','89', 'cf:a', 'value89' +put 'test','90', 'cf:a', 'value90' +put 'test','91', 'cf:a', 'value91' +put 'test','92', 'cf:a', 'value92' +put 'test','93', 'cf:a', 'value93' +put 'test','94', 'cf:a', 'value94' +put 'test','95', 'cf:a', 'value95' +put 'test','96', 'cf:a', 'value96' +put 'test','97', 'cf:a', 'value97' +put 'test','98', 'cf:a', 'value98' +put 'test','99', 'cf:a', 'value99' +put 'test','100', 'cf:a', 'value100' +snapshot 'test', 'test-snapshot' +list_snapshots + +disable 'test' +drop 'test' +exit diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFnTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFnTest.java new file mode 100644 index 0000000000..0183f856f1 --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/CleanupHBaseSnapshotRestoreFilesFnTest.java @@ -0,0 +1,55 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.beam.hbasesnapshots; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; + +import java.util.UUID; +import org.junit.Test; + +public class CleanupHBaseSnapshotRestoreFilesFnTest { + private static final String TEST_BUCKET_NAME = "test-bucket"; + private static final String TEST_SNAPSHOT_PATH = "gs://" + TEST_BUCKET_NAME + "/hbase-export"; + private static final String TEST_RESTORE_PATH = + HBaseSnapshotInputConfigBuilder.RESTORE_DIR + UUID.randomUUID(); + private static final String TEST_RESTORE_PREFIX = TEST_RESTORE_PATH.substring(1); + + @Test + public void testGetWorkingBucketName() { + assertEquals( + TEST_BUCKET_NAME, + CleanupHBaseSnapshotRestoreFilesFn.getWorkingBucketName(TEST_SNAPSHOT_PATH)); + + assertThrows( + IllegalArgumentException.class, + () -> { + CleanupHBaseSnapshotRestoreFilesFn.getWorkingBucketName(TEST_BUCKET_NAME); + }); + } + + @Test + public void testGetListPrefix() { + assertEquals( + TEST_RESTORE_PREFIX, CleanupHBaseSnapshotRestoreFilesFn.getListPrefix(TEST_RESTORE_PATH)); + + assertThrows( + IllegalArgumentException.class, + () -> { + CleanupHBaseSnapshotRestoreFilesFn.getWorkingBucketName(TEST_RESTORE_PREFIX); + }); + } +} diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java new file mode 100644 index 0000000000..62f1cdced2 --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java @@ -0,0 +1,190 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.beam.hbasesnapshots; + +import static com.google.common.base.Preconditions.checkNotNull; + +import com.google.api.services.storage.model.Objects; +import com.google.cloud.bigtable.beam.sequencefiles.testing.BigtableTableUtils; +import com.google.cloud.bigtable.hbase.BigtableConfiguration; +import com.google.cloud.bigtable.hbase.BigtableOptionsFactory; +import java.io.File; +import java.io.IOException; +import java.util.UUID; +import org.apache.beam.runners.dataflow.DataflowRunner; +import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; +import org.apache.beam.sdk.PipelineResult.State; +import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; +import org.apache.beam.sdk.extensions.gcp.util.GcsUtil; +import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/* + * End to end integration test for pipeline that import HBase snapshot data into Cloud Bigtable. + * Prepare test data with gsutil(https://cloud.google.com/storage/docs/quickstart-gsutil): + * gsutil -m cp -r /bigtable-dataflow-parent/bigtable-beam-import/src/test/data/ \ + * gs:///integration-test/ + * + * Setup GCP credential: https://cloud.google.com/docs/authentication + * Ensure your credential have access to Bigtable and Dataflow + * + * Run with: + * mvn integration-test -PhbasesnapshotsIntegrationTest \ + * -Dgoogle.bigtable.project.id= \ + * -Dgoogle.bigtable.instance.id= \ + * -Dgoogle.dataflow.stagingLocation=gs:///staging \ + * -Dcloud.test.data.folder=gs:///integration-test/ + */ +public class EndToEndIT { + + private final Log LOG = LogFactory.getLog(getClass()); + private static final String TEST_SNAPSHOT_NAME = "test-snapshot"; + // Location of test data hosted on Google Cloud Storage, for on-cloud dataflow tests. + private static final String CLOUD_TEST_DATA_FOLDER = "cloud.test.data.folder"; + private static final String DATAFLOW_REGION = "region"; + + // Column family name used in all test bigtables. + private static final String CF = "cf"; + + // Full path of the Cloud Storage folder where dataflow jars are uploaded to. + private static final String GOOGLE_DATAFLOW_STAGING_LOCATION = "google.dataflow.stagingLocation"; + + private Connection connection; + private String projectId; + private String instanceId; + private String tableId; + private String region; + + private GcsUtil gcsUtil; + private String dataflowStagingLocation; + private String workDir; + private byte[][] keySplits; + + // Snapshot data setup + private String hbaseSnapshotDir; + + @Before + public void setup() throws Exception { + projectId = getTestProperty(BigtableOptionsFactory.PROJECT_ID_KEY); + instanceId = getTestProperty(BigtableOptionsFactory.INSTANCE_ID_KEY); + dataflowStagingLocation = getTestProperty(GOOGLE_DATAFLOW_STAGING_LOCATION); + region = getTestProperty(DATAFLOW_REGION); + String cloudTestDataFolder = getTestProperty(CLOUD_TEST_DATA_FOLDER); + if (!cloudTestDataFolder.endsWith(File.separator)) { + cloudTestDataFolder = cloudTestDataFolder + File.separator; + } + + hbaseSnapshotDir = cloudTestDataFolder + "data/"; + UUID test_uuid = UUID.randomUUID(); + + // Cloud Storage config + GcpOptions gcpOptions = PipelineOptionsFactory.create().as(GcpOptions.class); + gcpOptions.setProject(projectId); + gcsUtil = new GcsUtil.GcsUtilFactory().create(gcpOptions); + + // Bigtable config + connection = BigtableConfiguration.connect(projectId, instanceId); + tableId = "test_" + UUID.randomUUID().toString(); + + LOG.info("Setting up integration tests"); + + String[] keys = new String[] {"1", "2", "3", "4", "5", "6", "7", "8", "9"}; + keySplits = new byte[keys.length][]; + for (int i = 0; i < keys.length; i++) { + keySplits[i] = keys[i].getBytes(); + } + } + + private static String getTestProperty(String name) { + return checkNotNull(System.getProperty(name), "Required property missing: " + name); + } + + @After + public void teardown() throws IOException { + connection.close(); + + // delete test table + BigtableConfiguration.connect(projectId, instanceId) + .getAdmin() + .deleteTable(TableName.valueOf(tableId)); + } + + @Test + public void testHBaseSnapshotImport() throws Exception { + + // Crete table + TableName tableName = TableName.valueOf(tableId); + HTableDescriptor descriptor = new HTableDescriptor(tableName); + + descriptor.addFamily(new HColumnDescriptor(CF)); + + connection.getAdmin().createTable(descriptor, SnapshotTestingUtils.getSplitKeys()); + + // Start import + DataflowPipelineOptions importPipelineOpts = + PipelineOptionsFactory.as(DataflowPipelineOptions.class); + importPipelineOpts.setRunner(DataflowRunner.class); + importPipelineOpts.setGcpTempLocation(dataflowStagingLocation); + importPipelineOpts.setNumWorkers(1); + importPipelineOpts.setProject(projectId); + importPipelineOpts.setRegion(region); + + ImportJobFromHbaseSnapshot.ImportOptions importOpts = + importPipelineOpts.as(ImportJobFromHbaseSnapshot.ImportOptions.class); + + // setup GCP and bigtable + importOpts.setBigtableProject(StaticValueProvider.of(projectId)); + importOpts.setBigtableInstanceId(StaticValueProvider.of(instanceId)); + importOpts.setBigtableTableId(StaticValueProvider.of(tableId)); + + // setup HBase snapshot info + importOpts.setHbaseSnapshotSourceDir(hbaseSnapshotDir); + importOpts.setSnapshotName(TEST_SNAPSHOT_NAME); + + // run pipeline + State state = ImportJobFromHbaseSnapshot.buildPipeline(importOpts).run().waitUntilFinish(); + Assert.assertEquals(State.DONE, state); + + // check data in bigtable + BigtableTableUtils destTable = new BigtableTableUtils(connection, tableId, CF); + Assert.assertEquals( + 100 /* There are 100 rows in test snapshot*/, + destTable.readAllCellsFromTable().toArray().length); + + // check that the .restore dir used for temp files has been removed + Objects objects = + gcsUtil.listObjects( + GcsPath.fromUri(hbaseSnapshotDir).getBucket(), + CleanupHBaseSnapshotRestoreFilesFn.getListPrefix( + HBaseSnapshotInputConfigBuilder.RESTORE_DIR), + null); + Assert.assertNull(objects.getItems()); + + // TODO(vermas2012): Add more validations after this. + } +} diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilderTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilderTest.java new file mode 100644 index 0000000000..579a57c238 --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigBuilderTest.java @@ -0,0 +1,48 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.beam.hbasesnapshots; + +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat; +import org.apache.hadoop.mapreduce.InputFormat; +import org.junit.Test; + +public class HBaseSnapshotInputConfigBuilderTest { + + private static final String TEST_PROJECT = "test_project"; + private static final String TEST_SNAPSHOT_DIR = "gs://test-bucket/hbase-export"; + private static final String TEST_SNAPSHOT_NAME = "test_snapshot"; + + @Test + public void testBuildingHBaseSnapshotInputConfigBuilder() { + Configuration conf = + new HBaseSnapshotInputConfigBuilder() + .setProjectId(TEST_PROJECT) + .setHbaseSnapshotSourceDir(TEST_SNAPSHOT_DIR) + .setSnapshotName(TEST_SNAPSHOT_NAME) + .createHBaseConfiguration(); + assertEquals( + "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS", conf.get("fs.AbstractFileSystem.gs.impl")); + assertEquals(TEST_PROJECT, conf.get("fs.gs.project.id")); + assertEquals(TEST_SNAPSHOT_DIR, conf.get("hbase.rootdir")); + assertEquals( + TableSnapshotInputFormat.class, + conf.getClass( + "mapreduce.job.inputformat.class", TableSnapshotInputFormat.class, InputFormat.class)); + } +} diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/README.md b/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/README.md new file mode 100644 index 0000000000..3d9b722bb9 --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/resources/README.md @@ -0,0 +1,18 @@ +# Generating the test HBase snapshot for HBase snapshot import integration tests + +The file `generate_test_data.txt` is an HBase command line command sequence +used to generated the testing HBase snapshot data. + +If you need to modify the test data used by `bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java`, +Please make sure you have HBase installed and export `/bin` to your PATH. + +Then: + + $ hbase shell ./generate_test_data.txt + $ hbase org.apache.hadoop.hbase.snapshot.ExportSnapshot -Dmapreduce.framework.name=local -snapshot test-snapshot -copy-to file:////data + + $ cd + $ gsutil -m cp -r ./data/ gs:///integration-test/ + +After this, you use be able to run the integration test with your new data by specifying +`-Dcloud.test.data.folder=gs:///integration-test/` \ No newline at end of file diff --git a/pom.xml b/pom.xml index 790d7a0c06..fb77fefd26 100644 --- a/pom.xml +++ b/pom.xml @@ -79,7 +79,7 @@ limitations under the License. 2.24.0 30.1-android - 20.0 + 29.0-jre 1.7.4 1.29.0