HBASE-25318 Config option for IntegrationTestImportTsv where to generate HFiles to bulkload (#2777)

IntegrationTestImportTsv is generating HFiles under the working directory of the
current hdfs user executing the tool, before bulkloading it into HBase.

Assuming you encrypt the HBase root directory within HDFS (using HDFS
Transparent Encryption), you can bulkload HFiles only if they sit in the same
encryption zone in HDFS as the HBase root directory itself.

When IntegrationTestImportTsv is executed against a real distributed cluster
and the working directory of the current user (e.g. /user/hbase) is not in the
same encryption zone as the HBase root directory (e.g. /hbase/data) then you
will get an exception:

```
ERROR org.apache.hadoop.hbase.regionserver.HRegion: There was a partial failure
due to IO when attempting to load d :
hdfs://mycluster/user/hbase/test-data/22d8460d-04cc-e032-88ca-2cc20a7dd01c/
IntegrationTestImportTsv/hfiles/d/74655e3f8da142cb94bc31b64f0475cc

org.apache.hadoop.ipc.RemoteException(java.io.IOException):
/user/hbase/test-data/22d8460d-04cc-e032-88ca-2cc20a7dd01c/
IntegrationTestImportTsv/hfiles/d/74655e3f8da142cb94bc31b64f0475cc
can't be moved into an encryption zone.
```

In this commit I make it configurable where the IntegrationTestImportTsv
generates the HFiles.

Co-authored-by: Mate Szalay-Beko <symat@apache.com>
Signed-off-by: Peter Somogyi <psomogyi@apache.org>
This commit is contained in:
Mate Szalay-Beko 2021-01-05 09:24:24 +01:00 committed by GitHub
parent 600be60a4b
commit 481662ab39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 34 additions and 2 deletions

View File

@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
@ -29,6 +30,7 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.UUID;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured; import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@ -66,6 +68,8 @@ public class IntegrationTestImportTsv extends Configured implements Tool {
private static final String NAME = IntegrationTestImportTsv.class.getSimpleName(); private static final String NAME = IntegrationTestImportTsv.class.getSimpleName();
private static final Logger LOG = LoggerFactory.getLogger(IntegrationTestImportTsv.class); private static final Logger LOG = LoggerFactory.getLogger(IntegrationTestImportTsv.class);
private static final String GENERATED_HFILE_FOLDER_PARAM_KEY =
"IntegrationTestImportTsv.generatedHFileFolder";
protected static final String simple_tsv = protected static final String simple_tsv =
"row1\t1\tc1\tc2\n" + "row1\t1\tc1\tc2\n" +
@ -190,8 +194,8 @@ public class IntegrationTestImportTsv extends Configured implements Tool {
void generateAndLoad(final TableName table) throws Exception { void generateAndLoad(final TableName table) throws Exception {
LOG.info("Running test testGenerateAndLoad."); LOG.info("Running test testGenerateAndLoad.");
String cf = "d"; String cf = "d";
Path hfiles = new Path( Path hfiles = initGeneratedHFilePath(table);
util.getDataTestDirOnTestFS(table.getNameAsString()), "hfiles"); LOG.info("The folder where the HFiles will be generated: {}", hfiles.toString());
Map<String, String> args = new HashMap<>(); Map<String, String> args = new HashMap<>();
args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString()); args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
@ -220,6 +224,12 @@ public class IntegrationTestImportTsv extends Configured implements Tool {
System.err.println(format("%s [genericOptions]", NAME)); System.err.println(format("%s [genericOptions]", NAME));
System.err.println(" Runs ImportTsv integration tests against a distributed cluster."); System.err.println(" Runs ImportTsv integration tests against a distributed cluster.");
System.err.println(); System.err.println();
System.err.println(" Use '-D" + GENERATED_HFILE_FOLDER_PARAM_KEY + "=<path>' to define a");
System.err.println(" base folder for the generated HFiles. If HDFS Transparent Encryption");
System.err.println(" is configured, then make sure to set this parameter to a folder in");
System.err.println(" the same encryption zone in HDFS as the HBase root directory,");
System.err.println(" otherwise the bulkload will fail.");
System.err.println();
ToolRunner.printGenericCommandUsage(System.err); ToolRunner.printGenericCommandUsage(System.err);
return 1; return 1;
} }
@ -237,6 +247,28 @@ public class IntegrationTestImportTsv extends Configured implements Tool {
return 0; return 0;
} }
private Path initGeneratedHFilePath(final TableName table) throws IOException {
String folderParam = getConf().getTrimmed(GENERATED_HFILE_FOLDER_PARAM_KEY);
if (folderParam == null || folderParam.isEmpty()) {
// by default, fall back to the test data dir
return new Path(util.getDataTestDirOnTestFS(table.getNameAsString()), "hfiles");
}
Path hfiles = new Path(folderParam, UUID.randomUUID().toString());
FileSystem fs = util.getTestFileSystem();
String shouldPreserve = System.getProperty("hbase.testing.preserve.testdir", "false");
if (!Boolean.parseBoolean(shouldPreserve)) {
if (fs.getUri().getScheme().equals(FileSystem.getLocal(getConf()).getUri().getScheme())) {
File localFoler = new File(hfiles.toString());
localFoler.deleteOnExit();
} else {
fs.deleteOnExit(hfiles);
}
}
return hfiles;
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create(); Configuration conf = HBaseConfiguration.create();
IntegrationTestingUtility.setUseDistributedCluster(conf); IntegrationTestingUtility.setUseDistributedCluster(conf);