HBASE-10268. TestSplitLogWorker occasionally fails

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1556916 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Kyle Purtell 2014-01-09 18:50:52 +00:00
parent 3cc835b7e1
commit d39389e2e3
1 changed files with 28 additions and 26 deletions

View File

@ -54,6 +54,7 @@ import org.junit.experimental.categories.Category;
@Category(MediumTests.class)
public class TestSplitLogWorker {
private static final Log LOG = LogFactory.getLog(TestSplitLogWorker.class);
private static final int WAIT_TIME = 15000;
private final ServerName MANAGER = ServerName.valueOf("manager,1,1");
static {
Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
@ -140,7 +141,7 @@ public class TestSplitLogWorker {
};
@Test
@Test(timeout=60000)
public void testAcquireTaskAtStartup() throws Exception {
LOG.info("testAcquireTaskAtStartup");
SplitLogCounters.resetCounters();
@ -155,7 +156,7 @@ public class TestSplitLogWorker {
new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), mockedRS, neverEndingTask);
slw.start();
try {
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, WAIT_TIME);
byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS));
SplitLogTask slt = SplitLogTask.parseFrom(bytes);
assertTrue(slt.isOwned(RS));
@ -168,14 +169,14 @@ public class TestSplitLogWorker {
throws InterruptedException {
if (slw != null) {
slw.stop();
slw.worker.join(3000);
slw.worker.join(WAIT_TIME);
if (slw.worker.isAlive()) {
assertTrue(("Could not stop the worker thread slw=" + slw) == null);
}
}
}
@Test
@Test(timeout=60000)
public void testRaceForTask() throws Exception {
LOG.info("testRaceForTask");
SplitLogCounters.resetCounters();
@ -194,11 +195,12 @@ public class TestSplitLogWorker {
slw1.start();
slw2.start();
try {
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, WAIT_TIME);
// Assert that either the tot_wkr_failed_to_grab_task_owned count was set of if
// not it, that we fell through to the next counter in line and it was set.
assertTrue(waitForCounterBoolean(SplitLogCounters.tot_wkr_failed_to_grab_task_owned, 0, 1, 1500, false) ||
SplitLogCounters.tot_wkr_failed_to_grab_task_lost_race.get() == 1);
assertTrue(waitForCounterBoolean(SplitLogCounters.tot_wkr_failed_to_grab_task_owned, 0, 1,
WAIT_TIME, false) ||
SplitLogCounters.tot_wkr_failed_to_grab_task_lost_race.get() == 1);
byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TRFT));
SplitLogTask slt = SplitLogTask.parseFrom(bytes);
assertTrue(slt.isOwned(SVR1) || slt.isOwned(SVR2));
@ -208,7 +210,7 @@ public class TestSplitLogWorker {
}
}
@Test
@Test(timeout=60000)
public void testPreemptTask() throws Exception {
LOG.info("testPreemptTask");
SplitLogCounters.resetCounters();
@ -221,27 +223,27 @@ public class TestSplitLogWorker {
try {
Thread.yield(); // let the worker start
Thread.sleep(1000);
waitForCounter(SplitLogCounters.tot_wkr_task_grabing, 0, 1, 5000);
waitForCounter(SplitLogCounters.tot_wkr_task_grabing, 0, 1, WAIT_TIME);
// this time create a task node after starting the splitLogWorker
zkw.getRecoverableZooKeeper().create(PATH,
new SplitLogTask.Unassigned(MANAGER).toByteArray(), Ids.OPEN_ACL_UNSAFE,
CreateMode.PERSISTENT);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 8000);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, WAIT_TIME);
assertEquals(1, slw.taskReadySeq);
byte [] bytes = ZKUtil.getData(zkw, PATH);
SplitLogTask slt = SplitLogTask.parseFrom(bytes);
assertTrue(slt.isOwned(SRV));
slt = new SplitLogTask.Unassigned(MANAGER);
ZKUtil.setData(zkw, PATH, slt.toByteArray());
waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 5000);
waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, WAIT_TIME);
} finally {
stopSplitLogWorker(slw);
}
}
@Test
@Test(timeout=60000)
public void testMultipleTasks() throws Exception {
LOG.info("testMultipleTasks");
SplitLogCounters.resetCounters();
@ -254,13 +256,13 @@ public class TestSplitLogWorker {
try {
Thread.yield(); // let the worker start
Thread.sleep(100);
waitForCounter(SplitLogCounters.tot_wkr_task_grabing, 0, 1, 5000);
waitForCounter(SplitLogCounters.tot_wkr_task_grabing, 0, 1, WAIT_TIME);
SplitLogTask unassignedManager = new SplitLogTask.Unassigned(MANAGER);
zkw.getRecoverableZooKeeper().create(PATH1, unassignedManager.toByteArray(),
Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 5000);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, WAIT_TIME);
// now the worker is busy doing the above task
// create another task
@ -272,9 +274,9 @@ public class TestSplitLogWorker {
final ServerName anotherWorker = ServerName.valueOf("another-worker,1,1");
SplitLogTask slt = new SplitLogTask.Owned(anotherWorker);
ZKUtil.setData(zkw, PATH1, slt.toByteArray());
waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 5000);
waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, WAIT_TIME);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 1, 2, 5000);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 1, 2, WAIT_TIME);
assertEquals(2, slw.taskReadySeq);
byte [] bytes = ZKUtil.getData(zkw, PATH2);
slt = SplitLogTask.parseFrom(bytes);
@ -284,7 +286,7 @@ public class TestSplitLogWorker {
}
}
@Test
@Test(timeout=60000)
public void testRescan() throws Exception {
LOG.info("testRescan");
SplitLogCounters.resetCounters();
@ -300,25 +302,25 @@ public class TestSplitLogWorker {
zkw.getRecoverableZooKeeper().create(task,slt.toByteArray(), Ids.OPEN_ACL_UNSAFE,
CreateMode.PERSISTENT);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, WAIT_TIME);
// now the worker is busy doing the above task
// preempt the task, have it owned by another worker
ZKUtil.setData(zkw, task, slt.toByteArray());
waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, 1500);
waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 0, 1, WAIT_TIME);
// create a RESCAN node
String rescan = ZKSplitLog.getEncodedNodeName(zkw, "RESCAN");
rescan = zkw.getRecoverableZooKeeper().create(rescan, slt.toByteArray(), Ids.OPEN_ACL_UNSAFE,
CreateMode.PERSISTENT_SEQUENTIAL);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 1, 2, 1500);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 1, 2, WAIT_TIME);
// RESCAN node might not have been processed if the worker became busy
// with the above task. preempt the task again so that now the RESCAN
// node is processed
ZKUtil.setData(zkw, task, slt.toByteArray());
waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 1, 2, 1500);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired_rescan, 0, 1, 1500);
waitForCounter(SplitLogCounters.tot_wkr_preempt_task, 1, 2, WAIT_TIME);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired_rescan, 0, 1, WAIT_TIME);
List<String> nodes = ZKUtil.listChildrenNoWatch(zkw, zkw.splitLogZNode);
LOG.debug(nodes);
@ -336,7 +338,7 @@ public class TestSplitLogWorker {
assertEquals(2, num);
}
@Test
@Test(timeout=60000)
public void testAcquireMultiTasks() throws Exception {
LOG.info("testAcquireMultiTasks");
SplitLogCounters.resetCounters();
@ -356,7 +358,7 @@ public class TestSplitLogWorker {
SplitLogWorker slw = new SplitLogWorker(zkw, testConf, mockedRS, neverEndingTask);
slw.start();
try {
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, maxTasks, 6000);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, maxTasks, WAIT_TIME);
for (int i = 0; i < maxTasks; i++) {
byte[] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS + i));
SplitLogTask slt = SplitLogTask.parseFrom(bytes);
@ -372,7 +374,7 @@ public class TestSplitLogWorker {
* RS
* @throws Exception
*/
@Test
@Test(timeout=60000)
public void testAcquireMultiTasksByAvgTasksPerRS() throws Exception {
LOG.info("testAcquireMultiTasks");
SplitLogCounters.resetCounters();
@ -401,7 +403,7 @@ public class TestSplitLogWorker {
slw.start();
try {
int acquiredTasks = 0;
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 2, 6000);
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 2, WAIT_TIME);
for (int i = 0; i < maxTasks; i++) {
byte[] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS + i));
SplitLogTask slt = SplitLogTask.parseFrom(bytes);