HBASE-4798 Sleeps and synchronisation improvements for tests

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1204769 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-11-22 01:04:40 +00:00
parent bba38d9a5f
commit 23532b3538
13 changed files with 111 additions and 67 deletions

View File

@ -409,7 +409,8 @@ public class AssignmentManager extends ZooKeeperListener {
synchronized(this.regionsInTransition) {
while (!this.master.isStopped() &&
this.regionsInTransition.containsKey(hri.getEncodedName())) {
this.regionsInTransition.wait();
// We expect a notify, but by security we set a timout
this.regionsInTransition.wait(100);
}
}
return intransistion;
@ -1824,7 +1825,10 @@ public class AssignmentManager extends ZooKeeperListener {
throws InterruptedException {
synchronized(regions) {
while(!regions.containsKey(regionInfo)) {
regions.wait();
// We should receive a notification, but it's
// better to have a timeout to recheck the condition here:
// it lowers the impact of a race condition if any
regions.wait(100);
}
}
}

View File

@ -420,11 +420,11 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
", cluster-up flag was=" + wasUp);
}
// Check if we should stop every second.
private Sleeper stopSleeper = new Sleeper(1000, this);
private void loop() {
// Check if we should stop every second.
Sleeper sleeper = new Sleeper(1000, this);
while (!this.stopped) {
sleeper.sleep();
stopSleeper.sleep();
}
}
@ -1504,6 +1504,8 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
public void stop(final String why) {
LOG.info(why);
this.stopped = true;
// We wake up the stopSleeper to stop immediately
stopSleeper.skipSleepCycle();
// If we are a backup master, we need to interrupt wait
if (this.activeMasterManager != null) {
synchronized (this.activeMasterManager.clusterHasActiveMaster) {

View File

@ -967,6 +967,8 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
stop("Failed initialization");
throw convertThrowableToIOE(cleanup(e, "Failed init"),
"Region server startup failed");
} finally {
sleeper.skipSleepCycle();
}
}
@ -1552,9 +1554,14 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
public void stop(final String msg) {
this.stopped = true;
LOG.info("STOPPED: " + msg);
synchronized (this) {
// Wakes run() if it is sleeping
notifyAll(); // FindBugs NN_NAKED_NOTIFY
// Wakes run() if it is sleeping
//sleeper.skipSleepCycle();
//will be uncommented later, see discussion in jira 4798
}
public void waitForServerOnline(){
while (!isOnline() && !isStopped()){
sleeper.sleep();
}
}
@ -1722,10 +1729,17 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
*/
private ServerName getMaster() {
ServerName masterServerName = null;
long previousLogTime = 0;
while ((masterServerName = this.masterAddressManager.getMasterAddress()) == null) {
if (!keepLooping()) return null;
LOG.debug("No master found; retry");
sleeper.sleep();
if (System.currentTimeMillis() > (previousLogTime+1000)){
LOG.debug("No master found; retry");
previousLogTime = System.currentTimeMillis();
}
try {
Thread.sleep(100);
} catch (InterruptedException ignored) {
}
}
InetSocketAddress isa =
new InetSocketAddress(masterServerName.getHostname(), masterServerName.getPort());
@ -1744,11 +1758,20 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
e = e instanceof RemoteException ?
((RemoteException)e).unwrapRemoteException() : e;
if (e instanceof ServerNotRunningYetException) {
LOG.info("Master isn't available yet, retrying");
if (System.currentTimeMillis() > (previousLogTime+1000)){
LOG.info("Master isn't available yet, retrying");
previousLogTime = System.currentTimeMillis();
}
} else {
LOG.warn("Unable to connect to master. Retrying. Error was:", e);
if (System.currentTimeMillis() > (previousLogTime + 1000)) {
LOG.warn("Unable to connect to master. Retrying. Error was:", e);
previousLogTime = System.currentTimeMillis();
}
}
try {
Thread.sleep(200);
} catch (InterruptedException ignored) {
}
sleeper.sleep();
}
}
LOG.info("Connected to master at " + isa);

View File

@ -62,14 +62,7 @@ public class JVMClusterUtil {
// the HRS#run method. HRS#init can fail for whatever region. In those
// cases, we'll jump out of the run without setting online flag. Check
// stopRequested so we don't wait here a flag that will never be flipped.
while (!this.regionServer.isOnline() &&
!this.regionServer.isStopped()) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// continue waiting
}
}
regionServer.waitForServerOnline();
}
}
@ -118,22 +111,6 @@ public class JVMClusterUtil {
public HMaster getMaster() {
return this.master;
}
/**
* Block until the master has come online, indicating it is ready
* to be used.
*/
public void waitForServerOnline() {
// The server is marked online after init begins but before race to become
// the active master.
while (!this.master.isMasterRunning() && !this.master.isStopped()) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// continue waiting
}
}
}
}
/**
@ -165,20 +142,49 @@ public class JVMClusterUtil {
return new JVMClusterUtil.MasterThread(server, index);
}
private static JVMClusterUtil.MasterThread findActiveMaster(
List<JVMClusterUtil.MasterThread> masters) {
for (JVMClusterUtil.MasterThread t : masters) {
if (t.master.isActiveMaster()) {
return t;
}
}
return null;
}
/**
* Start the cluster. Waits until there is a primary master and returns its
* address.
* Start the cluster. Waits until there is a primary master initialized
* and returns its address.
* @param masters
* @param regionservers
* @return Address to use contacting primary master.
*/
public static String startup(final List<JVMClusterUtil.MasterThread> masters,
final List<JVMClusterUtil.RegionServerThread> regionservers) throws IOException {
if (masters != null) {
for (JVMClusterUtil.MasterThread t : masters) {
t.start();
if (masters == null || masters.isEmpty()) {
return null;
}
for (JVMClusterUtil.MasterThread t : masters) {
t.start();
}
// Wait for an active master
// having an active master before starting the region threads allows
// then to succeed on their connection to master
long startTime = System.currentTimeMillis();
while (findActiveMaster(masters) == null) {
try {
Thread.sleep(100);
} catch (InterruptedException ignored) {
}
if (System.currentTimeMillis() > startTime + 30000) {
throw new RuntimeException("Master not active after 30 seconds");
}
}
if (regionservers != null) {
for (JVMClusterUtil.RegionServerThread t: regionservers) {
HRegionServer hrs = t.getRegionServer();
@ -187,19 +193,21 @@ public class JVMClusterUtil {
t.start();
}
}
if (masters == null || masters.isEmpty()) {
return null;
}
// Wait for an active master
// Wait for an active master to be initialized (implies being master)
// with this, when we return the cluster is complete
startTime = System.currentTimeMillis();
while (true) {
for (JVMClusterUtil.MasterThread t : masters) {
if (t.master.isActiveMaster()) {
return t.master.getServerName().toString();
}
JVMClusterUtil.MasterThread t = findActiveMaster(masters);
if (t != null && t.master.isInitialized()) {
return t.master.getServerName().toString();
}
if (System.currentTimeMillis() > startTime + 200000) {
throw new RuntimeException("Master not initialized after 200 seconds");
}
try {
Thread.sleep(1000);
} catch(InterruptedException e) {
Thread.sleep(100);
} catch (InterruptedException ignored) {
// Keep waiting
}
}

View File

@ -62,7 +62,7 @@ public class Sleeper {
public void skipSleepCycle() {
synchronized (sleepLock) {
triggerWake = true;
sleepLock.notify();
sleepLock.notifyAll();
}
}

View File

@ -127,11 +127,9 @@ public abstract class ZooKeeperNodeTracker extends ZooKeeperListener {
}
}
while (!this.stopped && (notimeout || remaining > 0) && this.data == null) {
if (notimeout) {
wait();
continue;
}
wait(remaining);
// We expect a notification; but we wait with a
// a timeout to lower the impact of a race condition if any
wait(100);
remaining = timeout - (System.currentTimeMillis() - startTime);
}
return this.data;

View File

@ -1245,7 +1245,7 @@ public class HBaseTestingUtility {
boolean checkStatus) throws Exception {
Configuration c = new Configuration(this.conf);
String quorumServers = ZKConfig.getZKQuorumServersString(c);
int sessionTimeout = 5 * 1000; // 5 seconds
int sessionTimeout = 500;
ZooKeeper zk = nodeZK.getRecoverableZooKeeper().getZooKeeper();
byte[] password = zk.getSessionPasswd();
long sessionID = zk.getSessionId();
@ -1253,7 +1253,7 @@ public class HBaseTestingUtility {
ZooKeeper newZK = new ZooKeeper(quorumServers,
sessionTimeout, EmptyWatcher.instance, sessionID, password);
newZK.close();
final long sleep = sessionTimeout * 5L;
final long sleep = 7000; // 7s seems enough to manage the timeout
LOG.info("ZK Closed Session 0x" + Long.toHexString(sessionID) +
"; sleeping=" + sleep);

View File

@ -293,7 +293,6 @@ public class MiniHBaseCluster {
try {
t = hbaseCluster.addMaster(c, hbaseCluster.getMasters().size(), user);
t.start();
t.waitForServerOnline();
} catch (InterruptedException ie) {
throw new IOException("Interrupted adding master to cluster", ie);
}
@ -382,7 +381,7 @@ public class MiniHBaseCluster {
return true;
}
}
Thread.sleep(200);
Thread.sleep(100);
}
return false;
}

View File

@ -212,7 +212,7 @@ public class TestRegionRebalancing {
// while (!cluster.getMaster().allRegionsAssigned()) {
LOG.debug("Waiting for there to be 22 regions, but there are " + getRegionCount() + " right now.");
try {
Thread.sleep(1000);
Thread.sleep(200);
} catch (InterruptedException e) {}
}
}

View File

@ -23,6 +23,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@ -37,7 +38,7 @@ public class TestCatalogTrackerOnCluster {
/**
* @throws Exception
* @see https://issues.apache.org/jira/browse/HBASE-3445
* @see {https://issues.apache.org/jira/browse/HBASE-3445}
*/
@Test public void testBadOriginalRootLocation() throws Exception {
UTIL.getConfiguration().setInt("ipc.socket.timeout", 3000);
@ -61,9 +62,15 @@ public class TestCatalogTrackerOnCluster {
ServerName nonsense =
new ServerName("example.org", 1234, System.currentTimeMillis());
RootLocationEditor.setRootLocation(zookeeper, nonsense);
// Bring back up the hbase cluster. See if it can deal with nonsense root
// location.
// location. The cluster should start and be fully available.
UTIL.startMiniHBaseCluster(1, 1);
// if we can create a table, it's a good sign that it's working
UTIL.createTable(
getClass().getSimpleName().getBytes(), "family".getBytes());
UTIL.shutdownMiniCluster();
}
}

View File

@ -94,8 +94,11 @@ public class TestRestartCluster {
LOG.info("\n\nCreating tables");
for(byte [] TABLE : TABLES) {
UTIL.createTable(TABLE, FAMILY);
}
for(byte [] TABLE : TABLES) {
UTIL.waitTableAvailable(TABLE, 30000);
}
List<HRegionInfo> allRegions =
MetaScanner.listAllRegions(UTIL.getConfiguration());
assertEquals(3, allRegions.size());

View File

@ -127,7 +127,7 @@ public class TestHLog {
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
TEST_UTIL.shutdownMiniDFSCluster();
TEST_UTIL.shutdownMiniCluster();
}
private static String getName() {

View File

@ -59,7 +59,7 @@ public class TestFSTableDescriptors {
assertTrue(FSTableDescriptors.createTableDescriptor(fs, testdir, htd));
assertFalse(FSTableDescriptors.createTableDescriptor(fs, testdir, htd));
FileStatus [] statuses = fs.listStatus(testdir);
assertTrue(statuses.length == 1);
assertTrue("statuses.length="+statuses.length, statuses.length == 1);
for (int i = 0; i < 10; i++) {
FSTableDescriptors.updateHTableDescriptor(fs, testdir, htd);
}