hbase-9512: Regions can't get out InRecovery state sometimes when turn off distributeLogReplay and restart a cluster

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1523407 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
jeffreyz 2013-09-15 06:13:22 +00:00
parent 270aad5d55
commit b9ad5e8413
3 changed files with 31 additions and 1 deletions

View File

@ -572,6 +572,9 @@ public class SplitLogManager extends ZooKeeperListener {
throws KeeperException {
if (!this.distributedLogReplay) {
// remove any regions in recovery from ZK which could happen when we turn the feature on
// and later turn it off
ZKUtil.deleteChildrenRecursively(watcher, watcher.recoveringRegionsZNode);
// the function is only used in distributedLogReplay mode when master is in initialization
return;
}

View File

@ -3571,7 +3571,8 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa
if (previous == null) {
// check if the region to be opened is marked in recovering state in ZK
if (SplitLogManager.isRegionMarkedRecoveringInZK(this.getZooKeeper(),
if (this.distributedLogReplay
&& SplitLogManager.isRegionMarkedRecoveringInZK(this.getZooKeeper(),
region.getEncodedName())) {
this.recoveringRegions.put(region.getEncodedName(), null);
}

View File

@ -39,6 +39,7 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicLong;
@ -48,6 +49,8 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.SplitLogTask;
@ -532,4 +535,27 @@ public class TestSplitLogManager {
fs.delete(logDir, true);
}
}
/**
* The following test case is aiming to test the situation when distributedLogReplay is turned off
* and restart a cluster there should no recovery regions in ZK left.
* @throws Exception
*/
@Test(timeout = 300000)
public void testRecoveryRegionRemovedFromZK() throws Exception {
LOG.info("testRecoveryRegionRemovedFromZK");
conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
String nodePath =
ZKUtil.joinZNode(zkw.recoveringRegionsZNode,
HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
ZKUtil.createSetData(zkw, nodePath, ZKUtil.positionToByteArray(0L));
slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER);
slm.removeStaleRecoveringRegionsFromZK(null);
List<String> recoveringRegions =
zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
assertTrue("Recovery regions isn't cleaned", recoveringRegions.isEmpty());
}
}