From 01fb688a4a90303c56335d1150e26a336e429054 Mon Sep 17 00:00:00 2001 From: guluo Date: Sun, 21 May 2023 19:47:27 +0800 Subject: [PATCH] HBASE-27848:Should fast-fail if unmatched column family exists when using ImportTsv (#5225) Signed-off-by: Duo Zhang (cherry picked from commit ce29f97a809a849bf067fa3571fd775fb596fc10) --- .../hadoop/hbase/mapreduce/ImportTsv.java | 17 ++++++++++++++ .../hadoop/hbase/mapreduce/TestImportTsv.java | 22 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java index 665ff93a977..d7833fabeaf 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/ImportTsv.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.client.RegionLocator; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; @@ -554,6 +555,22 @@ public class ImportTsv extends Configured implements Tool { LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } + try (Table table = connection.getTable(tableName)) { + ArrayList unmatchedFamilies = new ArrayList<>(); + Set cfSet = getColumnFamilies(columns); + TableDescriptor tDesc = table.getDescriptor(); + for (String cf : cfSet) { + if (!tDesc.hasColumnFamily(Bytes.toBytes(cf))) { + unmatchedFamilies.add(cf); + } + } + if (unmatchedFamilies.size() > 0) { + String noSuchColumnFamiliesMsg = + format("Column families: %s do not exist.", unmatchedFamilies); + LOG.error(noSuchColumnFamiliesMsg); + throw new NoSuchColumnFamilyException(noSuchColumnFamiliesMsg); + } + } if (mapperClass.equals(TsvImporterTextMapper.class)) { usage(TsvImporterTextMapper.class.toString() + " should not be used for non bulkloading case. use " diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java index 737ae178b63..8a30e404cff 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportTsv.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; +import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests; import org.apache.hadoop.hbase.util.Bytes; @@ -241,6 +242,27 @@ public class TestImportTsv implements Configurable { }, args)); } + @Test + public void testMRNoMatchedColumnFamily() throws Exception { + util.createTable(tn, FAMILY); + + String[] args = new String[] { + "-D" + ImportTsv.COLUMNS_CONF_KEY + + "=HBASE_ROW_KEY,FAM:A,FAM01_ERROR:A,FAM01_ERROR:B,FAM02_ERROR:C", + tn.getNameAsString(), "/inputFile" }; + exception.expect(NoSuchColumnFamilyException.class); + assertEquals("running test job configuration failed.", 0, + ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() { + @Override + public int run(String[] args) throws Exception { + createSubmittableJob(getConf(), args); + return 0; + } + }, args)); + + util.deleteTable(tn); + } + @Test public void testMRWithoutAnExistingTable() throws Exception { String[] args = new String[] { tn.getNameAsString(), "/inputFile" };