From b99c98accae463f3e8e01dbcf9b7be97168c5858 Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Tue, 20 Oct 2020 19:48:50 +0000 Subject: [PATCH] Support import from HBase snapshot New files for configuring HBaseSnapshotInputFormat resovle version conflict and upgrade Beam version to 2.24.0 revert disk option change, not enough quota Code reorg code reduction Refactor naming Add integration config Add unit test for HBaseSnapshotInputConfiguration Set up skeleton for integration testing Ship test data with code, integration tests pass Clean up code for PR Add HBase commands that generates our test snapshot --- .../bigtable-beam-import/pom.xml | 75 ++++++- .../beam/{sequencefiles => }/Main.java | 11 +- .../HBaseSnapshotInputConfiguration.java | 95 +++++++++ .../ImportJobFromHbaseSnapshot.java | 152 ++++++++++++++ .../beam/sequencefiles/CreateTableHelper.java | 2 +- .../HBaseResultToMutationFn.java | 4 +- .../bigtable/beam/sequencefiles/Utils.java | 6 +- .../src/main/resources/log4j.properties | 4 +- .../test-snapshot/..snapshotinfo.crc | Bin 0 -> 12 bytes .../test-snapshot/.data.manifest.crc | Bin 0 -> 20 bytes .../test-snapshot/.snapshotinfo | 2 + .../test-snapshot/data.manifest | Bin 0 -> 1090 bytes .../cf/.b0f68aca966b48f1b171614e582b1cbb.crc | Bin 0 -> 52 bytes .../cf/b0f68aca966b48f1b171614e582b1cbb | Bin 0 -> 5264 bytes .../cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc | Bin 0 -> 52 bytes .../cf/8aff180e3a244dcc807e4de8b6fce0a7 | Bin 0 -> 5264 bytes .../cf/.c2945aa8dac34922913a1f60fedb6154.crc | Bin 0 -> 52 bytes .../cf/c2945aa8dac34922913a1f60fedb6154 | Bin 0 -> 5264 bytes .../cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc | Bin 0 -> 52 bytes .../cf/cda93ca899f3475fb1c0f8989a8f0d18 | Bin 0 -> 5264 bytes .../cf/.d8b49b374391407ba35d5e0db1c835c9.crc | Bin 0 -> 52 bytes .../cf/d8b49b374391407ba35d5e0db1c835c9 | Bin 0 -> 5299 bytes .../cf/.32053565831341128b8d8f5567d48fdc.crc | Bin 0 -> 52 bytes .../cf/32053565831341128b8d8f5567d48fdc | Bin 0 -> 5264 bytes .../cf/.36798a163ed046b193818e21dd7516b4.crc | Bin 0 -> 52 bytes .../cf/36798a163ed046b193818e21dd7516b4 | Bin 0 -> 5264 bytes .../cf/.65b9c6860f5f4de39d61d1674947b030.crc | Bin 0 -> 52 bytes .../cf/65b9c6860f5f4de39d61d1674947b030 | Bin 0 -> 5264 bytes .../cf/.b83044f76ba6474aa829e3bae7fd82d1.crc | Bin 0 -> 52 bytes .../cf/b83044f76ba6474aa829e3bae7fd82d1 | Bin 0 -> 5264 bytes .../src/test/generate_test_data.txt | 107 ++++++++++ .../beam/hbasesnapshots/EndToEndIT.java | 191 ++++++++++++++++++ .../HBaseSnapshotInputConfigurationTest.java | 51 +++++ pom.xml | 4 +- 34 files changed, 690 insertions(+), 14 deletions(-) rename bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/{sequencefiles => }/Main.java (79%) create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfiguration.java create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/.b0f68aca966b48f1b171614e582b1cbb.crc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/01ef4b8bb8d79f360bf182fedfb1c0e8/cf/b0f68aca966b48f1b171614e582b1cbb create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/.8aff180e3a244dcc807e4de8b6fce0a7.crc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1a1358ba82be4a98feff54032986bbf2/cf/8aff180e3a244dcc807e4de8b6fce0a7 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/.c2945aa8dac34922913a1f60fedb6154.crc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/1bf20ce0551df953331936d20dbd18fa/cf/c2945aa8dac34922913a1f60fedb6154 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/cda93ca899f3475fb1c0f8989a8f0d18 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/.d8b49b374391407ba35d5e0db1c835c9.crc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/36798a163ed046b193818e21dd7516b4 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/generate_test_data.txt create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java create mode 100644 bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigurationTest.java diff --git a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml index 751ed4dd25..18a40fcae4 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/pom.xml +++ b/bigtable-dataflow-parent/bigtable-beam-import/pom.xml @@ -25,7 +25,8 @@ limitations under the License. bigtable-beam-import - com.google.cloud.bigtable.beam.sequencefiles.Main + com.google.cloud.bigtable.beam.Main + false @@ -61,14 +62,12 @@ limitations under the License. org.apache.beam - - beam-sdks-java-extensions-google-cloud-platform-core - + beam-sdks-java-io-hadoop-common ${beam.version} org.apache.beam - beam-sdks-java-io-hadoop-common + beam-sdks-java-io-hadoop-format ${beam.version} @@ -78,6 +77,21 @@ limitations under the License. ${hbase.version} + + + org.apache.hbase + hbase-shaded-server + ${hbase.version} + + + + + org.apache.hbase + hbase-common + ${hbase.version} + com.google.auto.value auto-value @@ -133,6 +147,13 @@ limitations under the License. slf4j-api ${slf4j.version} + + + com.google.cloud.bigdataoss + gcs-connector + hadoop2-2.1.4 + shaded + @@ -165,6 +186,12 @@ limitations under the License. ${junit.version} test + + org.apache.hbase + hbase-shaded-testing-util + ${hbase.version} + test + @@ -249,6 +276,16 @@ limitations under the License. + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + @@ -360,5 +397,33 @@ limitations under the License. + + + hbasesnapshotsIntegrationTest + + + + org.apache.maven.plugins + maven-failsafe-plugin + + + hbasesnapshots-integration-test + + integration-test + + integration-test + + 1 + + **/hbasesnapshots/*IT.java + + false + + + + + + + diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Main.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java similarity index 79% rename from bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Main.java rename to bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java index 2d5cc71a9e..52fee350d7 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Main.java +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/Main.java @@ -13,10 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.google.cloud.bigtable.beam.sequencefiles; +package com.google.cloud.bigtable.beam; import com.google.bigtable.repackaged.com.google.api.core.InternalApi; import com.google.bigtable.repackaged.com.google.api.core.InternalExtensionOnly; +import com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot; +import com.google.cloud.bigtable.beam.sequencefiles.CreateTableHelper; +import com.google.cloud.bigtable.beam.sequencefiles.ExportJob; +import com.google.cloud.bigtable.beam.sequencefiles.ImportJob; import java.io.File; import java.net.URISyntaxException; import java.util.Arrays; @@ -43,6 +47,9 @@ public static void main(String[] args) throws Exception { case "import": ImportJob.main(subArgs); break; + case "importsnapshot": + ImportJobFromHbaseSnapshot.main(subArgs); + break; case "create-table": CreateTableHelper.main(subArgs); break; @@ -65,7 +72,7 @@ private static void usage() { System.out.printf( "java -jar %s \n" - + "Where can be 'export', 'import' or 'create-table'. To get further help, run: \n" + + "Where can be 'export', 'import' , 'importsnapshot' or 'create-table'. To get further help, run: \n" + "java -jar %s --help\n", jarName, jarName); } diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfiguration.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfiguration.java new file mode 100644 index 0000000000..2558b6a933 --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfiguration.java @@ -0,0 +1,95 @@ +/* + * Copyright 2017 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.beam.hbasesnapshots; + +import static java.lang.System.*; + +import com.google.common.base.Preconditions; +import org.apache.beam.sdk.io.hadoop.SerializableConfiguration; +import org.apache.beam.sdk.options.ValueProvider; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.mapreduce.TableInputFormat; +import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat; +import org.apache.hadoop.hbase.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; +import org.apache.hadoop.hbase.util.Base64; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.Job; + +/** + * A {@link Configuration} that could be used in {@link HadoopFormatIO} for reading HBase snapshot + * hosted in Google Cloud Storage(GCS) bucket via GCS connector. It uses {@link + * TableSnapshotInputFormat} for reading HBase snapshots. + */ +class HBaseSnapshotInputConfiguration { + + private static final Log LOG = LogFactory.getLog(HBaseSnapshotInputConfiguration.class); + private static final int BATCH_SIZE = 1000; + + private final Configuration hbaseConf; + + /** + * Constructs a new top level source. + * + * @param snapshotDir The path or pattern of the file(s) to read. + */ + HBaseSnapshotInputConfiguration( + ValueProvider gcsProjectId, + ValueProvider snapshotDir, + ValueProvider snapshotName, + ValueProvider restoreDir) { + + Preconditions.checkArgument( + snapshotDir.toString().startsWith("gs://"), + "snapshot folder must be hosted in a GCS bucket "); + + Configuration conf = HBaseConfiguration.create(); + try { + conf.set("hbase.rootdir", snapshotDir.toString()); + conf.set("fs.AbstractFileSystem.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS"); + conf.set("fs.gs.project.id", gcsProjectId.toString()); + conf.set("fs.defaultFS", snapshotDir.toString()); + conf.set("google.cloud.auth.service.account.enable", "true"); + conf.setClass( + "mapreduce.job.inputformat.class", TableSnapshotInputFormat.class, InputFormat.class); + conf.setClass("key.class", ImmutableBytesWritable.class, Writable.class); + conf.setClass("value.class", Result.class, Object.class); + ClientProtos.Scan proto = ProtobufUtil.toScan(new Scan().setBatch(BATCH_SIZE)); + conf.set(TableInputFormat.SCAN, Base64.encodeBytes(proto.toByteArray())); + + this.LOG.debug(conf); + Job job = Job.getInstance(conf); // creates internal clone of hbaseConf + TableSnapshotInputFormat.setInput( + job, snapshotName.toString(), new Path(restoreDir.toString())); + conf = job.getConfiguration(); // extract the modified clone + } catch (Exception e) { + this.LOG.fatal(e); + } + this.hbaseConf = new SerializableConfiguration(conf).get(); + } + + public Configuration getHbaseConf() { + return hbaseConf; + } +} diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java new file mode 100644 index 0000000000..40925535ab --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/hbasesnapshots/ImportJobFromHbaseSnapshot.java @@ -0,0 +1,152 @@ +/* + * Copyright 2017 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.beam.hbasesnapshots; + +import com.google.bigtable.repackaged.com.google.api.core.InternalExtensionOnly; +import com.google.cloud.bigtable.beam.CloudBigtableIO; +import com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration; +import com.google.cloud.bigtable.beam.TemplateUtils; +import com.google.cloud.bigtable.beam.sequencefiles.HBaseResultToMutationFn; +import com.google.cloud.bigtable.beam.sequencefiles.ImportJob; +import com.google.cloud.bigtable.beam.sequencefiles.Utils; +import com.google.common.annotations.VisibleForTesting; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.PipelineResult; +import org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.options.ValueProvider; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PDone; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.client.Mutation; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; + +/** + * A job that imports data from HBase snapshot exports hosted in Cloud Storage bucket into Cloud + * Bigtable. This job can be run directly or as a Dataflow template. + * + *

Execute the following command to run the job directly: + * + *

+ * mvn compile exec:java \
+ *   -DmainClass=com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot \
+ *   -Dexec.args="--runner=DataflowRunner \
+ *                --stagingLocation=gs://$STAGING_PATH \
+ *                --project=$PROJECT \
+ *                --bigtableInstanceId=$INSTANCE \
+ *                --bigtableTableId=$TABLE \
+ *                --hbaseRootDir=gs://$HBASE_ROOT_PATH \
+ *                --snapshotName=$SNAPSHOT_NAME  \
+ *                --restoreDir=gs://$RESTORE_PATH
+ * 
+ * + *

Execute the following command to create the Dataflow template: + * + *

+ * mvn compile exec:java \
+ *   -DmainClass=com.google.cloud.bigtable.beam.hbasesnapshots.ImportJobFromHbaseSnapshot \
+ *   -Dexec.args="--runner=DataflowRunner \
+ *                --project=$PROJECT \
+ *                --stagingLocation=gs://$STAGING_PATH \
+ *                --templateLocation=gs://$TEMPLATE_PATH \
+ *                --wait=false"
+ * 
+ * + *

There are a few ways to run the pipeline using the template. See Dataflow doc for details: + * https://cloud.google.com/dataflow/docs/templates/executing-templates. Optionally, you can upload + * a metadata file that contains information about the runtime parameters that can be used for + * parameter validation purpose and more. A sample metadata file can be found at + * "src/main/resources/ImportJob_metadata". + * + *

An example using gcloud command line: + * + *

+ * gcloud beta dataflow jobs run $JOB_NAME \
+ *   --gcs-location gs://$TEMPLATE_PATH \
+ *   --parameters bigtableProject=$PROJECT,bigtableInstanceId=$INSTANCE,bigtableTableId=$TABLE,hbaseRootDir=gs://$HBASE_ROOT_PATH,snapshotName=$SNAPSHOT_NAME,restoreDir=gs://$RESTORE_PATH
+ * 
+ */ +@InternalExtensionOnly +public class ImportJobFromHbaseSnapshot { + private static final Log LOG = LogFactory.getLog(ImportJobFromHbaseSnapshot.class); + + public interface ImportOptions extends ImportJob.ImportOptions { + @Description("The HBase root dir where HBase snapshot files resides.") + ValueProvider getHbaseRootDir(); + + @SuppressWarnings("unused") + void setHbaseRootDir(ValueProvider hbaseRootDir); + + @Description("Temp location for restoring snapshots") + ValueProvider getRestoreDir(); + + @SuppressWarnings("unused") + void setRestoreDir(ValueProvider restoreDir); + + @Description("Snapshot name") + ValueProvider getSnapshotName(); + + @SuppressWarnings("unused") + void setSnapshotName(ValueProvider snapshotName); + } + + public static void main(String[] args) { + PipelineOptionsFactory.register(ImportOptions.class); + + ImportOptions opts = + PipelineOptionsFactory.fromArgs(args).withValidation().as(ImportOptions.class); + + LOG.info("DEBUG===> Building Pipeline"); + Pipeline pipeline = buildPipeline(opts); + + LOG.info("DEBUG===> Running Pipeline"); + PipelineResult result = pipeline.run(); + + if (opts.getWait()) { + Utils.waitForPipelineToFinish(result); + } + } + + @VisibleForTesting + static Pipeline buildPipeline(ImportOptions opts) { + Pipeline pipeline = Pipeline.create(Utils.tweakOptions(opts)); + pipeline + .apply( + "Read from HBase Snapshot", + HadoopFormatIO.read() + .withConfiguration( + new HBaseSnapshotInputConfiguration( + opts.getBigtableProject(), + opts.getHbaseRootDir(), + opts.getSnapshotName(), + opts.getRestoreDir()) + .getHbaseConf())) + .apply("Create Mutations", ParDo.of(new HBaseResultToMutationFn())) + .apply("Write to Bigtable", createSink(opts)); + + return pipeline; + } + + static PTransform, PDone> createSink(ImportOptions opts) { + CloudBigtableTableConfiguration config = TemplateUtils.BuildImportConfig(opts); + return CloudBigtableIO.writeToTable(config); + } +} diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java index b4b3862817..4c794ed7eb 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/CreateTableHelper.java @@ -57,7 +57,7 @@ * intended to be a preparation step before running an {@link ImportJob}. */ @InternalApi -class CreateTableHelper { +public class CreateTableHelper { private static final Log LOG = LogFactory.getLog(CreateTableHelper.class); @InternalApi diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java index 6b2e628a5d..45954c7762 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/HBaseResultToMutationFn.java @@ -15,6 +15,7 @@ */ package com.google.cloud.bigtable.beam.sequencefiles; +import com.google.bigtable.repackaged.com.google.api.core.InternalApi; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Predicate; @@ -43,7 +44,8 @@ * A {@link DoFn} function that converts a {@link Result} in the pipeline input to a {@link * Mutation} for output. */ -class HBaseResultToMutationFn extends DoFn, Mutation> { +@InternalApi +public class HBaseResultToMutationFn extends DoFn, Mutation> { private static Logger logger = LoggerFactory.getLogger(HBaseResultToMutationFn.class); private static final long serialVersionUID = 1L; diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java index 142c4a17ef..27e8f3debf 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/sequencefiles/Utils.java @@ -15,6 +15,7 @@ */ package com.google.cloud.bigtable.beam.sequencefiles; +import com.google.bigtable.repackaged.com.google.api.core.InternalApi; import org.apache.beam.runners.dataflow.DataflowRunner; import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; import org.apache.beam.sdk.PipelineResult; @@ -28,7 +29,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -class Utils { +@InternalApi +public class Utils { private static final Log LOG = LogFactory.getLog(Utils.class); /** @@ -74,7 +76,7 @@ public ResourceId apply(String input) { * * @param result */ - static void waitForPipelineToFinish(PipelineResult result) { + public static void waitForPipelineToFinish(PipelineResult result) { try { // Check to see if we are creating a template. // This should throw {@link UnsupportedOperationException} when creating a template. diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/main/resources/log4j.properties b/bigtable-dataflow-parent/bigtable-beam-import/src/main/resources/log4j.properties index 04f0fab6bb..451fb6c51f 100644 --- a/bigtable-dataflow-parent/bigtable-beam-import/src/main/resources/log4j.properties +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/main/resources/log4j.properties @@ -21,4 +21,6 @@ log4j.appender.stdout.Target=System.out log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n -log4j.category.org.apache.hadoop.io.compress.CodecPool=WARN +log4j.category.org.apache.hadoop.io.compress.CodecPool=TRACE +log4j.logger.org.apache.hadoop.io.compress.Compression=TRACE +log4j.logger.org.apache.hadoop.util.NativeCodeLoader=DEBUG diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/..snapshotinfo.crc new file mode 100644 index 0000000000000000000000000000000000000000..8fe4533a0159f76b5bb3a1968ac5d1fa7fc45a58 GIT binary patch literal 12 TcmYc;N@ieSU}AWGHv9?z6(s~B literal 0 HcmV?d00001 diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.data.manifest.crc new file mode 100644 index 0000000000000000000000000000000000000000..1467a17f1f9924f6a69bd2963d5e21ff088ca3f6 GIT binary patch literal 20 bcmYc;N@ieSU}8|8vgZ5;2Nu4voskOwJbDJ( literal 0 HcmV?d00001 diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo new file mode 100644 index 0000000000..83e482aac0 --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/.snapshotinfo @@ -0,0 +1,2 @@ + + test-snapshottestÙ÷¤×ä. ( \ No newline at end of file diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/.hbase-snapshot/test-snapshot/data.manifest new file mode 100644 index 0000000000000000000000000000000000000000..180516dc03055633111dc6316e9e50e08c8196ba GIT binary patch literal 1090 zcmaiz&2H2%6on`K5yb^TT?tiXg91pPO304?u9ri`g{YayD2YI@8QHPD>Wa4S!HOr~ z4cPMb!XD$;cVJZR|#C6t<0iqX4T>i-dQvYgAdSl-`5xwlq(KQ za)l4ke%Z{It?903{;j(=PhydBE&jE|vADh-xO7Chn7QO$rtD5&>JQ0e$asLC@KLmi0{>|nJ_ z1c=m8MJ44yK#@c{cMWav)BoGo`DDxl8zTi0m@PQ>!vI7D?gKa(hd`{_C{*T|8bgOd zb73ubJ20+`LQ`gGlq3f4Bo%A-C$|Wlr#t6V`BxZAc6@gfKkRNEx>_? zD1=l#CFh5YwrHLeW2gtmwPT7iO*v7{GT}uaK7|iV4EpHXhL^(gOpKup7~woB>1_HY sJf&9Xz?=;#dMG^Is1)){kD*dHJxEB2iiZN{4DK6}G>vmhg%ep|Z7_g)uhQttp zF)QTz&CdJ0d2hb+W|Oih$JMDb z2uaIei@(`$AvBI);yyflU=2u$xaJmYm|CCw6#L>?x0h}fA|7!0!5Wb(60Z5jZZm)Q z2nP&sz!n1r8aQB(1MV%HV7QqBMmS)H0V9|LMmgXC14dgo zfRkbDAp;_<91!JJc+7xk8wbQVV2=SY?-SpLA8}Pn!D$Qv90c>#odz2C_WGl}^}IE? z{jBp*+57p|(|6xzmE^Rc>n08A0qd#{x&V`GNV~rJb3C`X@snGD9%vOlU`>E}zIJWO zDMckpIt6K@eXuumX{7g=crNwQjXSVEuIjpGL4ZjN%bdxOfZNb7_vZkmrRzlCE%>X^!%D^Y^Q6d|vLb9fn$(fd!rB0_E z@p0^jrL4TH=7^OiMix>8=w>7w#9mc~7^fbpz7;Es6RhL@UwcX-gwo z*3=CGi7I7QEAiq^?4>}KT(XqpA)gPQ#m%Oo5kpQ-kU)2}KIc8Q=WM0+q;(e;7MBVk zIX^*HvQ*;K?SuWc*IdrVs?G-7jf0LeVGtQKqd7T6&L6|QxXmHx=IPG-a7Pcf-u9{a zC~Y~7RJ!K|)OEe?SkVqlx>>>WM?kNm#J=gZpo2EIG*l{I3)w67T(J-;+ViE-N~kz% zuNFd6g^fJ@S@aq9t@y@TVKq@&Ua`yeTB%%#M$#_4N2JKB$z<3x=>Kffx$)rhzr0T2jWcvFeCu`92jaAfMEf6%Yoq*0T>a00}hN30T>m4Lk^6# z3c#2EyyL*wDFGN4fcG32ZxaAP$cZBkD8ViD_+vsh(y$wGfTLicy4^s74efnTzJ8i) zwhr;r&+fBtU)py@wbYztnl=mS0qd#{nuJL{@NTGnAAQFQdEpOiL#yxsZvxcwv+I$X z)^w_6(vSwf276hT27kyV^63|T(n-Pz-82mc0&HqI_I#E`{FZr>oF~zwX=K%;t(h5z z7HGzHX6bw+z#_>7(vhM%wW*U<4NaeSl4d?*N7(LG202ICscPvO&Dl;iZ##C5IlXoy zXGj=+%wkEv5Ekb(a>GmWwg9 zFwOqt=+vt_1ncdnxx9r{osFcML_KHHqAF;{atexEyGVLTn@6z4vz3M6jGk=0AN2A* zYdM2fy5|AZb-!*~(PNnO^Mct$z>Xv4-|Seh!I~QnmDldY+*Nm>RE(9}yXEp~taQg+ zFUDqz+XZ%6^jYp!Vtb>wo-D7dx@+!6d94zS@GiVZu!7^iPTLl{j(YpF=I4RXfxok> zd#}GA51{eZDSRDe(XH4&(?GsjhTG7?a!3WF0#X5~fK)&#AQg}bNCl(pnJn$(xZ z5mG8Jw@&!*AT*C);R!qfFguVE_pEO^uynZiE%C#5=f zFeB`+&w-h<0x&B8ha8yg6@Y{Q)Hsmvuk}6qoYajBoW>-;VVHro(?AnD;}4GNS?}XV zFS=fSeg5HO<6ZrWmR`0@(`HV?FbCV6uEHW8NZWN>aIlF*?Uft}c`fq>zKFvq)5xhQTQjqi z6iL>mOJpVFW1jdD?n@J$*o0XN$#88*1L5#d@ zS_A?$(rjkp#}}}l0eNzrYUxw90KSa7ZOtH-nq45Fp=N&Wd#>h|(t5JI%bQy}rHEQw zU^`KrxOoSl-oBQ}Ev#wmz(Y9f8dDZgK{FOpP~=JlkKkUHV8ye_f^bJqx8Co8qmQiR z4ASVHJCN6Q+p(gju;}FpvmXJwjtc)~*Mbe!+)AvvcRS+jI_u?9r0i@|tGki%nzLVu zER||S_OlqZocqbzL1{l#t?W8`&Ovpr;f<6!yr-2o9ey9PE%rMa8P&Rue1T(c|KQNu zu`wlzW_#w9Ybb|qM*bNF;=kP1izqyka_slb1w GfcFm(gDz_T literal 0 HcmV?d00001 diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/2c25a1cedf575cd08267e0013e45872e/cf/.cda93ca899f3475fb1c0f8989a8f0d18.crc new file mode 100644 index 0000000000000000000000000000000000000000..931ebfb54555d336879fa44ef956de26ba9c2a4e GIT binary patch literal 52 mcmYc;N@ieSU}9MD$S>Yy!SuIZFSEYew5qfM7k&ilN1?|tXZW;Cj$jNEmE zlx>)szxnVXG>>562p%fTHl)OT>l-dC9W0(Dz6B24tw)u308Am6?Z_7&Z`zY{+&+Fm z044-ro&ytY0uU2`3{zFp&YBN=lK0*dQ8}X9cTkS;Z1;sehoa;vbsU^ zd=|ne?_s`e38Q=}q)OS>e%y;gDZ{c%2OKD|9lBZ|VZUuH<109lw#WI2qD6(7zIag(9pog%Zdf`#S`EVF||d@hA@9qVm1Y_OiXRXF^O5EmQ5f~ zJZbJ{tAr+7cNCl(MA=N`FDt4b?y;j9U&TL27c7ajlr literal 0 HcmV?d00001 diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3264826a5972b18c5a59b2f612678316/cf/d8b49b374391407ba35d5e0db1c835c9 new file mode 100644 index 0000000000000000000000000000000000000000..d640fc8498e06935ecaf06a2714dda361af9ed7e GIT binary patch literal 5299 zcmeH{&rcIU6vt=V%8vp1W2p)LK!I=|M4K*9pkNH!c8UwT-MZaY@UpI0tOjib#1Q`k zjlrWaG2!5aXd)i`FFdGc5>F&vh(_=FraQGmFJ3*&ByTr6@AKxp`Oe$PsJ5EY7e^72 zcEFzgX2XZj9D<3r@CbvoAxZJAthz9@weT&uAJ}r&9+s5=xFTTf$fv}c{+ST-$4`J$ z2J1k6CC(AEpBqFx#I3>Qn29n%Y2$z~E=eB)#@abxoCAg#Fy6rd6C5zYfQe2HNN~Uf z1|%>COmcw2fXOZnnBss01E#t;V44G-Ghn)h1Cku@f&t0k&)~CjDc#7zfsFwi1q-(v z7*doYxAW-Nr>`r4(FgB#H0^QnRaMI@S*B^zph2+y)}YHU$%eEaZawO*?>}nX3iQky z@FD9HG#K37+skS?(Q;WxBYgmS-;zf9l1~-0Z~V9qM^d_J8V&^5#B%KAJdypDc@3Y% z(X?sg)wHddIfs-;&UO~bvK*k1_&n~-5S`e>$*P8?FFR?on6qViy7ht2;1E$QT_XkC z$ro+ME>Ne}vG^2@z*0_0)r-U_5vu?xWV#s%M{!VBA;znUCiE#u2#6Z$f@wjTza>$h zglHv(y5tzdDA=Y&AW^-{MkOJ98V4y*AXgnNbIcdP7jdVp8N^a^^F$tK)aSitKYLqg zK55;R)jMnDm|B{rD>*vx>W;vEdz&tAV-06J9>7u0nYM@un$et`B9~k|gnK-KZl3Ne z0>AO`)_Z;MGDKU>AocFK4|UybIac%pCjG2ndRNfjQDWb;>*=7)Esa;#Z^zu4yHY8~ zD(8xj@73IOceA=)k4Dlyyc3clODA`2i{4m6 z!&=A3K={z#*FSJDG9ty%boZQe1?AE8*gw-izGj9S(8FQ~1%v`Z0il3UKqw#-5DEwd SgaSeVp@2|8DDYn?;Qs^s0WsnL literal 0 HcmV?d00001 diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/.32053565831341128b8d8f5567d48fdc.crc new file mode 100644 index 0000000000000000000000000000000000000000..80317a1515597ecbac0015cf7edba1283ce6824b GIT binary patch literal 52 mcmYc;N@ieSU}9J$_&t8|r`=f(wzKMOT2)$s3%_IzQ3C*MFc~EP literal 0 HcmV?d00001 diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/3d397f3b97e7fd2358fb5c93060b3a60/cf/32053565831341128b8d8f5567d48fdc new file mode 100644 index 0000000000000000000000000000000000000000..5320c6c58dbe6e391c4994185de1655b0e874b09 GIT binary patch literal 5264 zcmeH{&rcIU6vt=VT40dcmYSeK1i}H0G|=*^#1N+K6jye;b-S%#Ox6{O#Q<#pF~r1+ z@uCT5P4wcyM57lE95694$RA+5dN%QDJb2SL-KiaV@#< zokvJn16%pchXub%rP zqiR&mW+08S2llQhjq)XzEM(qzaTg9HHN((t2(YMWTMIcF_L|03+>IkCL(h?vr5ahA z7HQV9=jcM%$0G4r+?l2twWysTx~eVMDWj0J!t8Ww1E0YsC^0pa<}Eu{uxu;OoNhbf zQ#b@m*+rris9mIH9#VwaW)vL30gXV6TNO*_i6tN)qLa&p32FY8#C!^3mFQ&N)~TMi z43k2lTAB4q0{AozFd$E_*lPNSFNlY5o2Ba1B-vRS?ycA7zNZe|tu&sj?(*`=YAH&J zvuq_>qi)?n*l%aUGY#ZxWViu6EQeG;Dj*e*3P=T{0#X5~fK)&#AQg}bNCl(< I|CIvXKg{tgK>z>% literal 0 HcmV?d00001 diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/7466202f701dc0e3af8cc747c9a37ec8/cf/.36798a163ed046b193818e21dd7516b4.crc new file mode 100644 index 0000000000000000000000000000000000000000..00a9d7720d3d3867ea0cd0a153d5265158a9b5ff GIT binary patch literal 52 mcmYc;N@ieSU}89xHtAGe`OGB@e!ed@ttzd+g?$pMob@YMq*J2@c10Y@H?2v%M_jwf{^1E(T89t3CP4i^ zWB1gIrV}ljfi%(!u;)!_q{CdYka_0E-8h`oP1CR;fD+55r5uU)E%O>ai=!#i$f+r+ znOU0@NtW94q!bCTNZgCN(nKee*csK(^pc%23t1Xrr&}BN3_e9vOV>!A+PMO?X`VUV zcEmk63`^NXRWA^`NUS`hh_KB_IEsV13Nda~ETKy-rYX&hugo?NxH^f6xuU&QTHGl->TXGx^5UZ4A(I&!ztc(S@HODk(- zSuM`8m2923b%$WTT@9DJvAVMb_u;7POj$$)%~(!Ok;|<(jyqj~ZJzBc40rT+>&4~e z6l*zy)Vk*m)V11ltmqL;`n`hLkAPiA$@^y4f(_Q(l2YAVmYoe}u~L>R&h2V-L$2I% zw#xE+d9TQR76X>Eo7~$jZ>6g18_uS)UEQojBWWMrW0JD^{_qmD*zYJlptU^>gbw`O zvA&msgVGS1?3j_Rpd7j`|1%BbYh<_yJuHS$Kqw#-5DEwdgaSeVp@2|8C?FIN3J3*+ J0{@i){y%HhEbjmS literal 0 HcmV?d00001 diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/.65b9c6860f5f4de39d61d1674947b030.crc new file mode 100644 index 0000000000000000000000000000000000000000..1d7e3d8653bfae2874b0d726f295edbbd3e92fa8 GIT binary patch literal 52 lcmYc;N@ieSU}CVbo1{1?#CqoE%2>fot4b?y;g`%IY5*h+7hwPZ literal 0 HcmV?d00001 diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/958c660f0e406404ffdfc81110e7eaf9/cf/65b9c6860f5f4de39d61d1674947b030 new file mode 100644 index 0000000000000000000000000000000000000000..e8d9789f5e9c26ecc1eed6ed9e7061b6e0a9d461 GIT binary patch literal 5264 zcmeH{&rcIU6vt=V@@o)mOAq*i@FN_E!G_-~Vhr1MiVM5lx}8?)WnHmY3|It2iHV6< zO*nWlF=|ZIXvCP{!2^jWPR4lhU-05pW5P|}bfD`P6}`fAS&s z57Y_3pa4ALz+k-q3<$(9PkD4N!&zrlh|5nnuNP5^?C2f9d9euC#$=-u((u+ zYPo5)l4B6B?jY>9t?KeNR&mzjE*$ZkahqtM8Otdsa(V>!;1-Wyn`b)`w_6~sPJ!gE!beqtqhk|uSDGycfMGN7TwFG(n_>= z$z3Z%XA7G-_Os}<-J7w^^}<@bw7lZ3y6dIYax_x5;619SKl8UD)Mme= LkP7@)3i$s3LhCH5 literal 0 HcmV?d00001 diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/.b83044f76ba6474aa829e3bae7fd82d1.crc new file mode 100644 index 0000000000000000000000000000000000000000..ca57c97e2deddae20f6c82712db07fd2e35620d7 GIT binary patch literal 52 lcmYc;N@ieSU}Esk=Ghmve-7`}t(p;=R+U!Z!Y`Ra)BrhK7wrH5 literal 0 HcmV?d00001 diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 b/bigtable-dataflow-parent/bigtable-beam-import/src/test/data/archive/data/default/test/dab1d611586e861818af77de74073d47/cf/b83044f76ba6474aa829e3bae7fd82d1 new file mode 100644 index 0000000000000000000000000000000000000000..c119dd13ef4179dcde442da0461e0f1b50569ba1 GIT binary patch literal 5264 zcmeH{&rcIU6vt=V%8x-xTY5kgL4G8}gbjuAqhJivc8UwT-MXDt@Zh>)p%}0P5JO^2 zjPc-)5Iyq;m>5qUGAD`@juXbF!5kCF~OPc)DFFP^)QpX-R!*2oA>5BZ#Ibt zitDo{0Z>+{E${Q;0bmNyiF@=BpjHQzsApl(p;KEkUt*tpTh7Y0LexiHL2C8D6CJGj zCq8rk;2{Bs2*6hkMCt@!NC3WZV5nXIh6Ug|2ZkF2K&Wxz2M3~!0x%-1@QVW@Pyj{+ z;5P?Gn*?A?0QNaBc31$$tH7j}1LMsC5EFnN4#fN|FOT1i>qd&6#t?y(L-o0@<(z%pT`}VZtoMoDX1)ZSQQ5$rcPV%Ab1ZvOm!kyB4VFh-e6?l&~ z0XXrq<0eXJI@Z!Dnnrm}?O9D4`ac> zB+Hy`JHjI{NSCtnNY7zAkF6|C5n`KBU>N#!L}T2lSVB)UMgzhInlUY!=5I;NrvO%o zf#z%j8(Csnm?kQhS*gSi+o7KUS$x*kk_UVNH~<@oW?&1Yr*WvOQlI;t>~XhJeX_cX zi_0qo73HVdO16&Ox&w5-tyP!1v5K<}cEPagOjsCE%~(!Bky8kE!)BLYn`b)<(mQ&v z^>+IvZ?Tp$aJhS~(z>qH94oj_C%wF2_9I}|QQ_b0TCl;ITNy5`FR9L&vrsIkMdxy< zw5Ap>IU5Cawy>RNKZ{<=xgOu%ENmo7t831>vsqd%M+0StzDJbk>-HCKiN$_L-Mw1F z6JKD@+t$(bs=r?u1mjIp${COW=hc6vq4}yAF47*BLn ["1", "2", "3", "4", "5", "6", "7", "8", "9"]} +put 'test','1', 'cf:a', 'value1' +put 'test','2', 'cf:a', 'value2' +put 'test','3', 'cf:a', 'value3' +put 'test','4', 'cf:a', 'value4' +put 'test','5', 'cf:a', 'value5' +put 'test','6', 'cf:a', 'value6' +put 'test','7', 'cf:a', 'value7' +put 'test','8', 'cf:a', 'value8' +put 'test','9', 'cf:a', 'value9' +put 'test','10', 'cf:a', 'value10' +put 'test','11', 'cf:a', 'value11' +put 'test','12', 'cf:a', 'value12' +put 'test','13', 'cf:a', 'value13' +put 'test','14', 'cf:a', 'value14' +put 'test','15', 'cf:a', 'value15' +put 'test','16', 'cf:a', 'value16' +put 'test','17', 'cf:a', 'value17' +put 'test','18', 'cf:a', 'value18' +put 'test','19', 'cf:a', 'value19' +put 'test','20', 'cf:a', 'value20' +put 'test','21', 'cf:a', 'value21' +put 'test','22', 'cf:a', 'value22' +put 'test','23', 'cf:a', 'value23' +put 'test','24', 'cf:a', 'value24' +put 'test','25', 'cf:a', 'value25' +put 'test','26', 'cf:a', 'value26' +put 'test','27', 'cf:a', 'value27' +put 'test','28', 'cf:a', 'value28' +put 'test','29', 'cf:a', 'value29' +put 'test','30', 'cf:a', 'value30' +put 'test','31', 'cf:a', 'value31' +put 'test','32', 'cf:a', 'value32' +put 'test','33', 'cf:a', 'value33' +put 'test','34', 'cf:a', 'value34' +put 'test','35', 'cf:a', 'value35' +put 'test','36', 'cf:a', 'value36' +put 'test','37', 'cf:a', 'value37' +put 'test','38', 'cf:a', 'value38' +put 'test','39', 'cf:a', 'value39' +put 'test','40', 'cf:a', 'value40' +put 'test','41', 'cf:a', 'value41' +put 'test','42', 'cf:a', 'value42' +put 'test','43', 'cf:a', 'value43' +put 'test','44', 'cf:a', 'value44' +put 'test','45', 'cf:a', 'value45' +put 'test','46', 'cf:a', 'value46' +put 'test','47', 'cf:a', 'value47' +put 'test','48', 'cf:a', 'value48' +put 'test','49', 'cf:a', 'value49' +put 'test','50', 'cf:a', 'value50' +put 'test','51', 'cf:a', 'value51' +put 'test','52', 'cf:a', 'value52' +put 'test','53', 'cf:a', 'value53' +put 'test','54', 'cf:a', 'value54' +put 'test','55', 'cf:a', 'value55' +put 'test','56', 'cf:a', 'value56' +put 'test','57', 'cf:a', 'value57' +put 'test','58', 'cf:a', 'value58' +put 'test','59', 'cf:a', 'value59' +put 'test','60', 'cf:a', 'value60' +put 'test','61', 'cf:a', 'value61' +put 'test','62', 'cf:a', 'value62' +put 'test','63', 'cf:a', 'value63' +put 'test','64', 'cf:a', 'value64' +put 'test','65', 'cf:a', 'value65' +put 'test','66', 'cf:a', 'value66' +put 'test','67', 'cf:a', 'value67' +put 'test','68', 'cf:a', 'value68' +put 'test','69', 'cf:a', 'value69' +put 'test','70', 'cf:a', 'value70' +put 'test','71', 'cf:a', 'value71' +put 'test','72', 'cf:a', 'value72' +put 'test','73', 'cf:a', 'value73' +put 'test','74', 'cf:a', 'value74' +put 'test','75', 'cf:a', 'value75' +put 'test','76', 'cf:a', 'value76' +put 'test','77', 'cf:a', 'value77' +put 'test','78', 'cf:a', 'value78' +put 'test','79', 'cf:a', 'value79' +put 'test','80', 'cf:a', 'value80' +put 'test','81', 'cf:a', 'value81' +put 'test','82', 'cf:a', 'value82' +put 'test','83', 'cf:a', 'value83' +put 'test','84', 'cf:a', 'value84' +put 'test','85', 'cf:a', 'value85' +put 'test','86', 'cf:a', 'value86' +put 'test','87', 'cf:a', 'value87' +put 'test','88', 'cf:a', 'value88' +put 'test','89', 'cf:a', 'value89' +put 'test','90', 'cf:a', 'value90' +put 'test','91', 'cf:a', 'value91' +put 'test','92', 'cf:a', 'value92' +put 'test','93', 'cf:a', 'value93' +put 'test','94', 'cf:a', 'value94' +put 'test','95', 'cf:a', 'value95' +put 'test','96', 'cf:a', 'value96' +put 'test','97', 'cf:a', 'value97' +put 'test','98', 'cf:a', 'value98' +put 'test','99', 'cf:a', 'value99' +put 'test','100', 'cf:a', 'value100' +snapshot 'test', 'test-snapshot' +list_snapshots + +disable 'test' +drop 'test' +exit diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java new file mode 100644 index 0000000000..6205842fef --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/EndToEndIT.java @@ -0,0 +1,191 @@ +/* + * Copyright 2020 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.beam.hbasesnapshots; + +import static com.google.common.base.Preconditions.checkNotNull; + +import com.google.cloud.bigtable.beam.sequencefiles.testing.BigtableTableUtils; +import com.google.cloud.bigtable.hbase.BigtableConfiguration; +import com.google.cloud.bigtable.hbase.BigtableOptionsFactory; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; +import org.apache.beam.runners.direct.DirectRunner; +import org.apache.beam.sdk.PipelineResult.State; +import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; +import org.apache.beam.sdk.extensions.gcp.util.GcsUtil; +import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/* + * End to end integration test for pipeline that import HBase snapshot data into Cloud Bigtable. + * Prepare test data with gsutil(https://cloud.google.com/storage/docs/quickstart-gsutil): + * gsutil -m cp -r /bigtable-dataflow-parent/bigtable-beam-import/src/test/data/ \ + * gs:///integration-test/ + * + * Setup GCP credential: https://cloud.google.com/docs/authentication + * Ensure your credential have access to Bigtable and Dataflow + * + * Run with: + * mvn integration-test -PhbasesnapshotsIntegrationTest \ + * -Dgoogle.bigtable.project.id= \ + * -Dgoogle.bigtable.instance.id= \ + * -Dgoogle.dataflow.stagingLocation=gs:///staging \ + * -Dcloud.test.data.folder=gs:///integration-test/ + */ +public class EndToEndIT { + + private static final String TEST_SNAPSHOT_NAME = "test-snapshot"; + // Location of test data hosted on Google Cloud Storage, for on-cloud dataflow tests. + private static final String CLOUD_TEST_DATA_FOLDER = "cloud.test.data.folder"; + + // Column family name used in all test bigtables. + private static final String CF = "cf"; + + // Full path of the Cloud Storage folder where dataflow jars are uploaded to. + private static final String GOOGLE_DATAFLOW_STAGING_LOCATION = "google.dataflow.stagingLocation"; + + private Connection connection; + private String projectId; + private String instanceId; + private String tableId; + + private GcsUtil gcsUtil; + private String dataflowStagingLocation; + private String workDir; + private byte[][] keySplits; + + // Snapshot data setup + private String hbaseSnapshotDir; + private String restoreDir; + + @Before + public void setup() throws Exception { + projectId = getTestProperty(BigtableOptionsFactory.PROJECT_ID_KEY); + instanceId = getTestProperty(BigtableOptionsFactory.INSTANCE_ID_KEY); + dataflowStagingLocation = getTestProperty(GOOGLE_DATAFLOW_STAGING_LOCATION); + String cloudTestDataFolder = getTestProperty(CLOUD_TEST_DATA_FOLDER); + if (!cloudTestDataFolder.endsWith(File.separator)) { + cloudTestDataFolder = cloudTestDataFolder + File.separator; + } + + hbaseSnapshotDir = cloudTestDataFolder + "data/"; + UUID test_uuid = UUID.randomUUID(); + restoreDir = cloudTestDataFolder + "restore/" + test_uuid; + + // Cloud Storage config + GcpOptions gcpOptions = PipelineOptionsFactory.create().as(GcpOptions.class); + gcpOptions.setProject(projectId); + gcsUtil = new GcsUtil.GcsUtilFactory().create(gcpOptions); + + // Bigtable config + connection = BigtableConfiguration.connect(projectId, instanceId); + tableId = "test_" + UUID.randomUUID().toString(); + + System.out.println("Setting up integration tests"); + + String[] keys = new String[] {"1", "2", "3", "4", "5", "6", "7", "8", "9"}; + keySplits = new byte[keys.length][]; + for (int i = 0; i < keys.length; i++) { + keySplits[i] = keys[i].getBytes(); + } + } + + private static String getTestProperty(String name) { + return checkNotNull(System.getProperty(name), "Required property missing: " + name); + } + + @After + public void teardown() throws IOException { + final List paths = gcsUtil.expand(GcsPath.fromUri(restoreDir + "/*")); + + if (!paths.isEmpty()) { + final List pathStrs = new ArrayList<>(); + + for (GcsPath path : paths) { + pathStrs.add(path.toString()); + } + this.gcsUtil.remove(pathStrs); + } + + connection.close(); + + // delete test table + BigtableConfiguration.connect(projectId, instanceId) + .getAdmin() + .deleteTable(TableName.valueOf(tableId)); + } + + @Test + public void testHBaseSnapshotImport() throws Exception { + + try (Connection connection = BigtableConfiguration.connect(projectId, instanceId)) { + // Crete table + System.out.println("DEBUG (create test table) ==>"); + TableName tableName = TableName.valueOf(tableId); + HTableDescriptor descriptor = new HTableDescriptor(tableName); + + descriptor.addFamily(new HColumnDescriptor(CF)); + + connection.getAdmin().createTable(descriptor, SnapshotTestingUtils.getSplitKeys()); + + // Start import + System.out.println("DEBUG (import snapshot) ==>"); + DataflowPipelineOptions importPipelineOpts = + PipelineOptionsFactory.as(DataflowPipelineOptions.class); + importPipelineOpts.setRunner(DirectRunner.class); + importPipelineOpts.setGcpTempLocation(dataflowStagingLocation); + importPipelineOpts.setNumWorkers(1); + importPipelineOpts.setProject(projectId); + + ImportJobFromHbaseSnapshot.ImportOptions importOpts = + importPipelineOpts.as(ImportJobFromHbaseSnapshot.ImportOptions.class); + // setup GCP and bigtable + importOpts.setBigtableProject(StaticValueProvider.of(projectId)); + importOpts.setBigtableInstanceId(StaticValueProvider.of(instanceId)); + importOpts.setBigtableTableId(StaticValueProvider.of(tableId)); + importOpts.setBigtableAppProfileId(null); + + // setup Hbase snapshot info + importOpts.setHbaseRootDir(StaticValueProvider.of(hbaseSnapshotDir)); + importOpts.setRestoreDir(StaticValueProvider.of(restoreDir)); + importOpts.setSnapshotName(StaticValueProvider.of(TEST_SNAPSHOT_NAME)); + + // run pipeline + State state = ImportJobFromHbaseSnapshot.buildPipeline(importOpts).run().waitUntilFinish(); + Assert.assertEquals(State.DONE, state); + + // check data in bigtable + BigtableTableUtils destTable = new BigtableTableUtils(connection, tableId, CF); + Assert.assertEquals( + 100 /* There are 100 rows in test snapshot*/, + destTable.readAllCellsFromTable().toArray().length); + } + } +} diff --git a/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigurationTest.java b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigurationTest.java new file mode 100644 index 0000000000..d5290a8562 --- /dev/null +++ b/bigtable-dataflow-parent/bigtable-beam-import/src/test/java/com/google/cloud/bigtable/beam/hbasesnapshots/HBaseSnapshotInputConfigurationTest.java @@ -0,0 +1,51 @@ +/* + * Copyright 2020 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.beam.hbasesnapshots; + +import static org.junit.Assert.assertEquals; + +import org.apache.beam.sdk.options.ValueProvider; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat; +import org.apache.hadoop.mapreduce.InputFormat; +import org.junit.Test; + +public class HBaseSnapshotInputConfigurationTest { + + private static final String TEST_PROJECT = "test_project"; + private static final String TEST_SNAPSHOT_DIR = "gs://test-bucket/hbase-export"; + private static final String TEST_SNAPSHOT_NAME = "test_snapshot"; + private static final String TEST_RESTORE_DIR = "gs://test-bucket/hbase-restore"; + + @Test + public void testBuildingHBaseSnapshotInputConfiguration() { + Configuration conf = + new HBaseSnapshotInputConfiguration( + ValueProvider.StaticValueProvider.of(TEST_PROJECT), + ValueProvider.StaticValueProvider.of(TEST_SNAPSHOT_DIR), + ValueProvider.StaticValueProvider.of(TEST_SNAPSHOT_NAME), + ValueProvider.StaticValueProvider.of(TEST_RESTORE_DIR)) + .getHbaseConf(); + assertEquals( + "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS", conf.get("fs.AbstractFileSystem.gs.impl")); + assertEquals(TEST_PROJECT, conf.get("fs.gs.project.id")); + assertEquals(TEST_SNAPSHOT_DIR, conf.get("hbase.rootdir")); + assertEquals( + TableSnapshotInputFormat.class, + conf.getClass( + "mapreduce.job.inputformat.class", TableSnapshotInputFormat.class, InputFormat.class)); + } +} diff --git a/pom.xml b/pom.xml index 1599fa5352..d3e41ebf20 100644 --- a/pom.xml +++ b/pom.xml @@ -76,10 +76,10 @@ limitations under the License. 1.3 3.6.28 - 2.19.0 + 2.24.0 30.0-android - 20.0 + 29.0-jre 1.7.4