HBASE-27848:Should fast-fail if unmatched column family exists when using ImportTsv (#5225)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
(cherry picked from commit ce29f97a80)
This commit is contained in:
guluo 2023-05-21 19:47:27 +08:00 committed by Duo Zhang
parent bf07ff4013
commit 01fb688a4a
2 changed files with 39 additions and 0 deletions

View File

@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
@ -554,6 +555,22 @@ public class ImportTsv extends Configured implements Tool {
LOG.error(errorMsg); LOG.error(errorMsg);
throw new TableNotFoundException(errorMsg); throw new TableNotFoundException(errorMsg);
} }
try (Table table = connection.getTable(tableName)) {
ArrayList<String> unmatchedFamilies = new ArrayList<>();
Set<String> cfSet = getColumnFamilies(columns);
TableDescriptor tDesc = table.getDescriptor();
for (String cf : cfSet) {
if (!tDesc.hasColumnFamily(Bytes.toBytes(cf))) {
unmatchedFamilies.add(cf);
}
}
if (unmatchedFamilies.size() > 0) {
String noSuchColumnFamiliesMsg =
format("Column families: %s do not exist.", unmatchedFamilies);
LOG.error(noSuchColumnFamiliesMsg);
throw new NoSuchColumnFamilyException(noSuchColumnFamiliesMsg);
}
}
if (mapperClass.equals(TsvImporterTextMapper.class)) { if (mapperClass.equals(TsvImporterTextMapper.class)) {
usage(TsvImporterTextMapper.class.toString() usage(TsvImporterTextMapper.class.toString()
+ " should not be used for non bulkloading case. use " + " should not be used for non bulkloading case. use "

View File

@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests; import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
@ -241,6 +242,27 @@ public class TestImportTsv implements Configurable {
}, args)); }, args));
} }
@Test
public void testMRNoMatchedColumnFamily() throws Exception {
util.createTable(tn, FAMILY);
String[] args = new String[] {
"-D" + ImportTsv.COLUMNS_CONF_KEY
+ "=HBASE_ROW_KEY,FAM:A,FAM01_ERROR:A,FAM01_ERROR:B,FAM02_ERROR:C",
tn.getNameAsString(), "/inputFile" };
exception.expect(NoSuchColumnFamilyException.class);
assertEquals("running test job configuration failed.", 0,
ToolRunner.run(new Configuration(util.getConfiguration()), new ImportTsv() {
@Override
public int run(String[] args) throws Exception {
createSubmittableJob(getConf(), args);
return 0;
}
}, args));
util.deleteTable(tn);
}
@Test @Test
public void testMRWithoutAnExistingTable() throws Exception { public void testMRWithoutAnExistingTable() throws Exception {
String[] args = new String[] { tn.getNameAsString(), "/inputFile" }; String[] args = new String[] { tn.getNameAsString(), "/inputFile" };