HBASE-6392 UnknownRegionException blocks hbck from sideline big overlap regions
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1363190 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7743260acc
commit
0da654d45f
|
@ -1288,6 +1288,7 @@ public class HBaseFsck {
|
|||
* the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
|
||||
* restart or failover may be required.
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
|
||||
if (hi.metaEntry == null && hi.hdfsEntry == null) {
|
||||
undeployRegions(hi);
|
||||
|
@ -1901,22 +1902,22 @@ public class HBaseFsck {
|
|||
LOG.debug("Contained region dir before close");
|
||||
debugLsr(hi.getHdfsRegionDir());
|
||||
try {
|
||||
LOG.info("Closing region: " + hi);
|
||||
closeRegion(hi);
|
||||
} catch (IOException ioe) {
|
||||
// TODO exercise this
|
||||
LOG.warn("Was unable to close region " + hi.getRegionNameAsString()
|
||||
+ ". Just continuing... ");
|
||||
LOG.warn("Was unable to close region " + hi
|
||||
+ ". Just continuing... ", ioe);
|
||||
} catch (InterruptedException e) {
|
||||
// TODO exercise this
|
||||
LOG.warn("Was unable to close region " + hi.getRegionNameAsString()
|
||||
+ ". Just continuing... ");
|
||||
LOG.warn("Was unable to close region " + hi
|
||||
+ ". Just continuing... ", e);
|
||||
}
|
||||
|
||||
try {
|
||||
LOG.info("Offlining region: " + hi);
|
||||
offline(hi.getRegionName());
|
||||
} catch (IOException ioe) {
|
||||
LOG.warn("Unable to offline region from master: " + hi, ioe);
|
||||
LOG.warn("Unable to offline region from master: " + hi
|
||||
+ ". Just continuing... ", ioe);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1965,14 +1966,21 @@ public class HBaseFsck {
|
|||
try {
|
||||
LOG.info("Closing region: " + regionToSideline);
|
||||
closeRegion(regionToSideline);
|
||||
} catch (InterruptedException ie) {
|
||||
LOG.warn("Was unable to close region " + regionToSideline.getRegionNameAsString()
|
||||
+ ". Interrupted.");
|
||||
throw new IOException(ie);
|
||||
} catch (IOException ioe) {
|
||||
LOG.warn("Was unable to close region " + regionToSideline
|
||||
+ ". Just continuing... ", ioe);
|
||||
} catch (InterruptedException e) {
|
||||
LOG.warn("Was unable to close region " + regionToSideline
|
||||
+ ". Just continuing... ", e);
|
||||
}
|
||||
|
||||
LOG.info("Offlining region: " + regionToSideline);
|
||||
offline(regionToSideline.getRegionName());
|
||||
try {
|
||||
LOG.info("Offlining region: " + regionToSideline);
|
||||
offline(regionToSideline.getRegionName());
|
||||
} catch (IOException ioe) {
|
||||
LOG.warn("Unable to offline region from master: " + regionToSideline
|
||||
+ ". Just continuing... ", ioe);
|
||||
}
|
||||
|
||||
LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
|
||||
Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
|
||||
|
|
|
@ -24,6 +24,7 @@ import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
|
|||
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
|
@ -62,9 +63,11 @@ import org.apache.hadoop.hbase.client.Result;
|
|||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
|
||||
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||
|
@ -74,6 +77,8 @@ import org.junit.BeforeClass;
|
|||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
import com.google.common.collect.Multimap;
|
||||
|
||||
/**
|
||||
* This tests HBaseFsck's ability to detect reasons for inconsistent tables.
|
||||
*/
|
||||
|
@ -483,8 +488,7 @@ public class TestHBaseFsck {
|
|||
// differentiate on ts/regionId! We actually need to recheck
|
||||
// deployments!
|
||||
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
|
||||
ServerName hsi;
|
||||
while ( (hsi = findDeployedHSI(getDeployedHRIs(admin), hriDupe)) == null) {
|
||||
while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) {
|
||||
Thread.sleep(250);
|
||||
}
|
||||
|
||||
|
@ -547,7 +551,6 @@ public class TestHBaseFsck {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This creates and fixes a bad table where a region is completely contained
|
||||
* by another region.
|
||||
|
@ -585,6 +588,98 @@ public class TestHBaseFsck {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates and fixes a bad table where an overlap group of
|
||||
* 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped
|
||||
* region. Mess around the meta data so that closeRegion/offlineRegion
|
||||
* throws exceptions.
|
||||
*/
|
||||
@Test
|
||||
public void testSidelineOverlapRegion() throws Exception {
|
||||
String table = "testSidelineOverlapRegion";
|
||||
try {
|
||||
setupTable(table);
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// Mess it up by creating an overlap
|
||||
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
|
||||
HMaster master = cluster.getMaster();
|
||||
HRegionInfo hriOverlap1 = createRegion(conf, tbl.getTableDescriptor(),
|
||||
Bytes.toBytes("A"), Bytes.toBytes("AB"));
|
||||
master.assignRegion(hriOverlap1);
|
||||
master.getAssignmentManager().waitForAssignment(hriOverlap1);
|
||||
HRegionInfo hriOverlap2 = createRegion(conf, tbl.getTableDescriptor(),
|
||||
Bytes.toBytes("AB"), Bytes.toBytes("B"));
|
||||
master.assignRegion(hriOverlap2);
|
||||
master.getAssignmentManager().waitForAssignment(hriOverlap2);
|
||||
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
|
||||
ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
|
||||
assertEquals(3, hbck.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// mess around the overlapped regions, to trigger NotServingRegionException
|
||||
Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
|
||||
ServerName serverName = null;
|
||||
byte[] regionName = null;
|
||||
for (HbckInfo hbi: overlapGroups.values()) {
|
||||
if ("A".equals(Bytes.toString(hbi.getStartKey()))
|
||||
&& "B".equals(Bytes.toString(hbi.getEndKey()))) {
|
||||
regionName = hbi.getRegionName();
|
||||
|
||||
// get an RS not serving the region to force bad assignment info in to META.
|
||||
int k = cluster.getServerWith(regionName);
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (i != k) {
|
||||
HRegionServer rs = cluster.getRegionServer(i);
|
||||
serverName = rs.getServerName();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
|
||||
HBaseFsckRepair.closeRegionSilentlyAndWait(admin,
|
||||
cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
|
||||
admin.offline(regionName);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assertNotNull(regionName);
|
||||
assertNotNull(serverName);
|
||||
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
Put put = new Put(regionName);
|
||||
put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
|
||||
Bytes.toBytes(serverName.getHostAndPort()));
|
||||
meta.put(put);
|
||||
|
||||
// fix the problem.
|
||||
HBaseFsck fsck = new HBaseFsck(conf);
|
||||
fsck.connect();
|
||||
fsck.setDisplayFullReport(); // i.e. -details
|
||||
fsck.setTimeLag(0);
|
||||
fsck.setFixAssignments(true);
|
||||
fsck.setFixMeta(true);
|
||||
fsck.setFixHdfsHoles(true);
|
||||
fsck.setFixHdfsOverlaps(true);
|
||||
fsck.setFixHdfsOrphans(true);
|
||||
fsck.setFixVersionFile(true);
|
||||
fsck.setSidelineBigOverlaps(true);
|
||||
fsck.setMaxMerge(2);
|
||||
fsck.onlineHbck();
|
||||
|
||||
// verify that overlaps are fixed, and there are less rows
|
||||
// since one region is sidelined.
|
||||
HBaseFsck hbck2 = doFsck(conf,false);
|
||||
assertNoErrors(hbck2);
|
||||
assertEquals(0, hbck2.getOverlapGroups(table).size());
|
||||
assertTrue(ROWKEYS.length > countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates and fixes a bad table where a region is completely contained
|
||||
* by another region, and there is a hole (sort of like a bad split)
|
||||
|
|
Loading…
Reference in New Issue