HBASE-23192 CatalogJanitor consistencyCheck does not log problematic row on exception (#734)

Adds logging of row and complaint if consistency check fails during CJ
checking. Adds a few more null checks. Does edit on the 'HBCK Report'
top line.

Signed-off-by: Reid Chan <reidchan@apache.org>
This commit is contained in:
Michael Stack 2019-10-30 13:10:19 -07:00 committed by GitHub
parent 538a4c51ff
commit 54425bf87b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 3 deletions

View File

@ -571,7 +571,12 @@ public class CatalogJanitor extends ScheduledChore {
return true;
}
this.report.count++;
RegionInfo regionInfo = metaTableConsistencyCheck(r);
RegionInfo regionInfo = null;
try {
regionInfo = metaTableConsistencyCheck(r);
} catch(Throwable t) {
LOG.warn("Failed consistency check on {}", Bytes.toStringBinary(r.getRow()), t);
}
if (regionInfo != null) {
LOG.trace(regionInfo.toString());
if (regionInfo.isSplitParent()) { // splitParent means split and offline.
@ -695,8 +700,14 @@ public class CatalogJanitor extends ScheduledChore {
if (locations == null) {
return;
}
if (locations.getRegionLocations() == null) {
return;
}
// Check referenced servers are known/online.
for (HRegionLocation location: locations.getRegionLocations()) {
if (location == null) {
continue;
}
ServerName sn = location.getServerName();
if (sn == null) {
continue;

View File

@ -78,7 +78,7 @@
<div class="row">
<div class="page-header">
<p><span>This page displays two reports. Only the report titles show if reports are empty.</span></p>
<p><span>This page displays two reports: the 'HBCK Chore Report' and the 'CatalogJanitor Consistency Issues' report. Only titles show if there are no problems to report. Note some conditions are transitory as regions migrate.</span></p>
</div>
</div>
<div class="row">

View File

@ -43,9 +43,12 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Category({MasterTests.class, LargeTests.class})
public class TestCatalogJanitorCluster {
private static final Logger LOG = LoggerFactory.getLogger(TestCatalogJanitorCluster.class);
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestCatalogJanitorCluster.class);
@ -130,7 +133,18 @@ public class TestCatalogJanitorCluster {
report = janitor.getLastReport();
assertFalse(report.isEmpty());
assertEquals(1, report.getUnknownServers().size());
// Finally, make an empty regioninfo in t1.
// Test what happens if we blow away an info:server row, if it is null. Should not kill CJ
// and we should log the row that had the problem. HBASE-23192. Just make sure we don't
// break if this happens.
LOG.info("Make null info:server");
Put emptyInfoServerPut = new Put(t1Ri1.getRegionName());
emptyInfoServerPut.addColumn(MetaTableAccessor.getCatalogFamily(),
MetaTableAccessor.getServerColumn(0), Bytes.toBytes(""));
MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(emptyInfoServerPut));
gc = janitor.scan();
report = janitor.getLastReport();
assertEquals(0, report.getUnknownServers().size());
// Mke an empty regioninfo in t1.
RegionInfo t1Ri2 = t1Ris.get(2);
Put pEmptyRI = new Put(t1Ri2.getRegionName());
pEmptyRI.addColumn(MetaTableAccessor.getCatalogFamily(),