HBASE-23192 CatalogJanitor consistencyCheck does not log problematic row on exception (#734)
Adds logging of row and complaint if consistency check fails during CJ checking. Adds a few more null checks. Does edit on the 'HBCK Report' top line. Signed-off-by: Reid Chan <reidchan@apache.org>
This commit is contained in:
parent
538a4c51ff
commit
54425bf87b
|
@ -571,7 +571,12 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
return true;
|
||||
}
|
||||
this.report.count++;
|
||||
RegionInfo regionInfo = metaTableConsistencyCheck(r);
|
||||
RegionInfo regionInfo = null;
|
||||
try {
|
||||
regionInfo = metaTableConsistencyCheck(r);
|
||||
} catch(Throwable t) {
|
||||
LOG.warn("Failed consistency check on {}", Bytes.toStringBinary(r.getRow()), t);
|
||||
}
|
||||
if (regionInfo != null) {
|
||||
LOG.trace(regionInfo.toString());
|
||||
if (regionInfo.isSplitParent()) { // splitParent means split and offline.
|
||||
|
@ -695,8 +700,14 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
if (locations == null) {
|
||||
return;
|
||||
}
|
||||
if (locations.getRegionLocations() == null) {
|
||||
return;
|
||||
}
|
||||
// Check referenced servers are known/online.
|
||||
for (HRegionLocation location: locations.getRegionLocations()) {
|
||||
if (location == null) {
|
||||
continue;
|
||||
}
|
||||
ServerName sn = location.getServerName();
|
||||
if (sn == null) {
|
||||
continue;
|
||||
|
|
|
@ -78,7 +78,7 @@
|
|||
|
||||
<div class="row">
|
||||
<div class="page-header">
|
||||
<p><span>This page displays two reports. Only the report titles show if reports are empty.</span></p>
|
||||
<p><span>This page displays two reports: the 'HBCK Chore Report' and the 'CatalogJanitor Consistency Issues' report. Only titles show if there are no problems to report. Note some conditions are transitory as regions migrate.</span></p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
|
|
|
@ -43,9 +43,12 @@ import org.junit.Rule;
|
|||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.rules.TestName;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@Category({MasterTests.class, LargeTests.class})
|
||||
public class TestCatalogJanitorCluster {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(TestCatalogJanitorCluster.class);
|
||||
@ClassRule
|
||||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestCatalogJanitorCluster.class);
|
||||
|
@ -130,7 +133,18 @@ public class TestCatalogJanitorCluster {
|
|||
report = janitor.getLastReport();
|
||||
assertFalse(report.isEmpty());
|
||||
assertEquals(1, report.getUnknownServers().size());
|
||||
// Finally, make an empty regioninfo in t1.
|
||||
// Test what happens if we blow away an info:server row, if it is null. Should not kill CJ
|
||||
// and we should log the row that had the problem. HBASE-23192. Just make sure we don't
|
||||
// break if this happens.
|
||||
LOG.info("Make null info:server");
|
||||
Put emptyInfoServerPut = new Put(t1Ri1.getRegionName());
|
||||
emptyInfoServerPut.addColumn(MetaTableAccessor.getCatalogFamily(),
|
||||
MetaTableAccessor.getServerColumn(0), Bytes.toBytes(""));
|
||||
MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(emptyInfoServerPut));
|
||||
gc = janitor.scan();
|
||||
report = janitor.getLastReport();
|
||||
assertEquals(0, report.getUnknownServers().size());
|
||||
// Mke an empty regioninfo in t1.
|
||||
RegionInfo t1Ri2 = t1Ris.get(2);
|
||||
Put pEmptyRI = new Put(t1Ri2.getRegionName());
|
||||
pEmptyRI.addColumn(MetaTableAccessor.getCatalogFamily(),
|
||||
|
|
Loading…
Reference in New Issue