From 481662ab39f8803849001b002bc5a8470f667d1e Mon Sep 17 00:00:00 2001 From: Mate Szalay-Beko Date: Tue, 5 Jan 2021 09:24:24 +0100 Subject: [PATCH] HBASE-25318 Config option for IntegrationTestImportTsv where to generate HFiles to bulkload (#2777) IntegrationTestImportTsv is generating HFiles under the working directory of the current hdfs user executing the tool, before bulkloading it into HBase. Assuming you encrypt the HBase root directory within HDFS (using HDFS Transparent Encryption), you can bulkload HFiles only if they sit in the same encryption zone in HDFS as the HBase root directory itself. When IntegrationTestImportTsv is executed against a real distributed cluster and the working directory of the current user (e.g. /user/hbase) is not in the same encryption zone as the HBase root directory (e.g. /hbase/data) then you will get an exception: ``` ERROR org.apache.hadoop.hbase.regionserver.HRegion: There was a partial failure due to IO when attempting to load d : hdfs://mycluster/user/hbase/test-data/22d8460d-04cc-e032-88ca-2cc20a7dd01c/ IntegrationTestImportTsv/hfiles/d/74655e3f8da142cb94bc31b64f0475cc org.apache.hadoop.ipc.RemoteException(java.io.IOException): /user/hbase/test-data/22d8460d-04cc-e032-88ca-2cc20a7dd01c/ IntegrationTestImportTsv/hfiles/d/74655e3f8da142cb94bc31b64f0475cc can't be moved into an encryption zone. ``` In this commit I make it configurable where the IntegrationTestImportTsv generates the HFiles. Co-authored-by: Mate Szalay-Beko Signed-off-by: Peter Somogyi --- .../mapreduce/IntegrationTestImportTsv.java | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java index c80d61c4ea6..28b4ae467dd 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; @@ -29,6 +30,7 @@ import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.TreeSet; +import java.util.UUID; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; @@ -66,6 +68,8 @@ public class IntegrationTestImportTsv extends Configured implements Tool { private static final String NAME = IntegrationTestImportTsv.class.getSimpleName(); private static final Logger LOG = LoggerFactory.getLogger(IntegrationTestImportTsv.class); + private static final String GENERATED_HFILE_FOLDER_PARAM_KEY = + "IntegrationTestImportTsv.generatedHFileFolder"; protected static final String simple_tsv = "row1\t1\tc1\tc2\n" + @@ -190,8 +194,8 @@ public class IntegrationTestImportTsv extends Configured implements Tool { void generateAndLoad(final TableName table) throws Exception { LOG.info("Running test testGenerateAndLoad."); String cf = "d"; - Path hfiles = new Path( - util.getDataTestDirOnTestFS(table.getNameAsString()), "hfiles"); + Path hfiles = initGeneratedHFilePath(table); + LOG.info("The folder where the HFiles will be generated: {}", hfiles.toString()); Map args = new HashMap<>(); args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString()); @@ -220,6 +224,12 @@ public class IntegrationTestImportTsv extends Configured implements Tool { System.err.println(format("%s [genericOptions]", NAME)); System.err.println(" Runs ImportTsv integration tests against a distributed cluster."); System.err.println(); + System.err.println(" Use '-D" + GENERATED_HFILE_FOLDER_PARAM_KEY + "=' to define a"); + System.err.println(" base folder for the generated HFiles. If HDFS Transparent Encryption"); + System.err.println(" is configured, then make sure to set this parameter to a folder in"); + System.err.println(" the same encryption zone in HDFS as the HBase root directory,"); + System.err.println(" otherwise the bulkload will fail."); + System.err.println(); ToolRunner.printGenericCommandUsage(System.err); return 1; } @@ -237,6 +247,28 @@ public class IntegrationTestImportTsv extends Configured implements Tool { return 0; } + private Path initGeneratedHFilePath(final TableName table) throws IOException { + String folderParam = getConf().getTrimmed(GENERATED_HFILE_FOLDER_PARAM_KEY); + if (folderParam == null || folderParam.isEmpty()) { + // by default, fall back to the test data dir + return new Path(util.getDataTestDirOnTestFS(table.getNameAsString()), "hfiles"); + } + + Path hfiles = new Path(folderParam, UUID.randomUUID().toString()); + FileSystem fs = util.getTestFileSystem(); + String shouldPreserve = System.getProperty("hbase.testing.preserve.testdir", "false"); + if (!Boolean.parseBoolean(shouldPreserve)) { + if (fs.getUri().getScheme().equals(FileSystem.getLocal(getConf()).getUri().getScheme())) { + File localFoler = new File(hfiles.toString()); + localFoler.deleteOnExit(); + } else { + fs.deleteOnExit(hfiles); + } + } + return hfiles; + } + + public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); IntegrationTestingUtility.setUseDistributedCluster(conf);