HBASE-13127 Add timeouts on all tests so less zombie sightings

This commit is contained in:
stack 2015-08-24 12:49:55 -07:00
parent b85857cf3c
commit d31aedb998
4 changed files with 846 additions and 679 deletions

View File

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.lang.annotation.Annotation;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.junit.experimental.categories.Category;
import org.junit.internal.runners.statements.FailOnTimeout;
import org.junit.rules.TestRule;
import org.junit.rules.Timeout;
import org.junit.runner.Description;
import org.junit.runners.model.Statement;
/**
* Set a test method timeout based off the test categories small, medium, large.
* Based on junit Timeout TestRule; see https://github.com/junit-team/junit/wiki/Rules
*/
public class CategoryBasedTimeout extends Timeout {
@Deprecated
public CategoryBasedTimeout(int millis) {
super(millis);
}
public CategoryBasedTimeout(long timeout, TimeUnit timeUnit) {
super(timeout, timeUnit);
}
protected CategoryBasedTimeout(Builder builder) {
super(builder);
}
public static Builder builder() {
return new CategoryBasedTimeout.Builder();
}
public static class Builder extends Timeout.Builder {
public Timeout.Builder withTimeout(Class<?> clazz) {
Annotation annotation = clazz.getAnnotation(Category.class);
if (annotation != null) {
Category category = (Category)annotation;
for (Class<?> c: category.value()) {
if (c == SmallTests.class) {
// See SmallTests. Supposed to run 15 seconds.
return withTimeout(30, TimeUnit.SECONDS);
} else if (c == MediumTests.class) {
// See MediumTests. Supposed to run 50 seconds.
return withTimeout(180, TimeUnit.SECONDS);
} else if (c == LargeTests.class) {
// Let large tests have a ten minute timeout.
return withTimeout(10, TimeUnit.MINUTES);
}
}
}
return this;
}
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.junit.Rule;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestRule;
import org.junit.rules.Timeout;
@Category({SmallTests.class})
public class TestTimeout {
@Rule public final TestRule timeout = CategoryBasedTimeout.builder()
.withTimeout(this.getClass())
.withLookingForStuckThread(true)
.build();
@Test
public void run1() throws InterruptedException {
Thread.sleep(100);
}
/**
* Enable to check if timeout works.
* Can't enable as it waits 30seconds and expected doesn't do Exception catching
*/
@Ignore @Test
public void infiniteLoop() {
while (true) {}
}
}

View File

@ -71,7 +71,6 @@ import org.apache.hadoop.hbase.client.ClusterConnection;
import org.apache.hadoop.hbase.client.ConnectionUtils;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.NonceGenerator;
import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator;
@ -222,9 +221,10 @@ public class TestDistributedLogSplitting {
Path rootdir = FSUtils.getRootDir(conf);
installTable(new ZooKeeperWatcher(conf, "table-creation", null),
Table t = installTable(new ZooKeeperWatcher(conf, "table-creation", null),
"table", "family", 40);
TableName table = TableName.valueOf("table");
try {
TableName table = t.getName();
List<HRegionInfo> regions = null;
HRegionServer hrs = null;
for (int i = 0; i < NUM_RS; i++) {
@ -261,7 +261,8 @@ public class TestDistributedLogSplitting {
Path tdir = FSUtils.getTableDir(rootdir, table);
Path editsdir =
WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
WALSplitter.getRegionDirRecoveredEditsDir(
HRegion.getRegionDir(tdir, hri.getEncodedName()));
LOG.debug("checking edits dir " + editsdir);
FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
@Override
@ -272,7 +273,8 @@ public class TestDistributedLogSplitting {
return true;
}
});
assertTrue("edits dir should have more than a single file in it. instead has " + files.length,
assertTrue(
"edits dir should have more than a single file in it. instead has " + files.length,
files.length > 1);
for (int i = 0; i < files.length; i++) {
int c = countWAL(files[i].getPath(), fs, conf);
@ -285,6 +287,9 @@ public class TestDistributedLogSplitting {
assertFalse(fs.exists(logDir));
assertEquals(NUM_LOG_LINES, count);
} finally {
if (t != null) t.close();
}
}
@Test(timeout = 300000)
@ -301,15 +306,17 @@ public class TestDistributedLogSplitting {
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
HRegionServer hrs = findRSToKill(false, "table");
List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
// wait for abort completes
this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
ht.close();
zkw.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
private static class NonceGeneratorWithDups extends PerClientRandomNonceGenerator {
@ -383,8 +390,8 @@ public class TestDistributedLogSplitting {
} finally {
ConnectionUtils.injectNonceGeneratorForTesting((ClusterConnection)
TEST_UTIL.getConnection(), oldNg);
ht.close();
zkw.close();
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
@ -401,14 +408,16 @@ public class TestDistributedLogSplitting {
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
HRegionServer hrs = findRSToKill(true, "table");
List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
ht.close();
zkw.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
private void abortRSAndVerifyRecovery(HRegionServer hrs, Table ht, final ZooKeeperWatcher zkw,
@ -469,7 +478,7 @@ public class TestDistributedLogSplitting {
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
HRegionServer hrs = findRSToKill(false, "table");
List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
@ -506,9 +515,10 @@ public class TestDistributedLogSplitting {
+ HBaseTestingUtility.getAllOnlineRegions(cluster).size());
assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
ht.close();
zkw.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
@Test(timeout = 300000)
@ -526,7 +536,7 @@ public class TestDistributedLogSplitting {
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
HRegionServer hrs = findRSToKill(false, "table");
List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
@ -567,9 +577,10 @@ public class TestDistributedLogSplitting {
+ HBaseTestingUtility.getAllOnlineRegions(cluster).size());
assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
ht.close();
zkw.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
@ -587,7 +598,7 @@ public class TestDistributedLogSplitting {
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
List<HRegionInfo> regions = null;
HRegionServer hrs1 = findRSToKill(false, "table");
regions = ProtobufUtil.getOnlineRegions(hrs1.getRSRpcServices());
@ -651,8 +662,10 @@ public class TestDistributedLogSplitting {
});
assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
ht.close();
zkw.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
@Test(timeout = 300000)
@ -664,6 +677,7 @@ public class TestDistributedLogSplitting {
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = master.getZooKeeper();
Table ht = installTable(zkw, "table", "family", 40);
try {
final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
@ -698,7 +712,10 @@ public class TestDistributedLogSplitting {
return (tmphrs.getRecoveringRegions().size() == 0);
}
});
ht.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
@Test(timeout = 300000)
@ -714,7 +731,7 @@ public class TestDistributedLogSplitting {
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
List<HRegionInfo> regions = null;
HRegionServer hrs = null;
for (int i = 0; i < NUM_RS; i++) {
@ -741,9 +758,10 @@ public class TestDistributedLogSplitting {
assertEquals("Data should remain after reopening of regions", originalCheckSum,
TEST_UTIL.checksumRows(ht));
ht.close();
zkw.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
@Test(timeout = 300000)
@ -758,7 +776,7 @@ public class TestDistributedLogSplitting {
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
try {
// turn off load balancing to prevent regions from moving around otherwise
// they will consume recovered.edits
master.balanceSwitch(false);
@ -852,7 +870,8 @@ public class TestDistributedLogSplitting {
Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf("disableTable"));
for (HRegionInfo hri : regions) {
Path editsdir =
WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
WALSplitter.getRegionDirRecoveredEditsDir(
HRegion.getRegionDir(tdir, hri.getEncodedName()));
LOG.debug("checking edits dir " + editsdir);
if(!fs.exists(editsdir)) continue;
FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
@ -881,12 +900,15 @@ public class TestDistributedLogSplitting {
// clean up
for (HRegionInfo hri : regions) {
Path editsdir =
WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
WALSplitter.getRegionDirRecoveredEditsDir(
HRegion.getRegionDir(tdir, hri.getEncodedName()));
fs.delete(editsdir, true);
}
disablingHT.close();
ht.close();
zkw.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
@Test(timeout = 300000)
@ -904,6 +926,7 @@ public class TestDistributedLogSplitting {
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
@ -959,9 +982,10 @@ public class TestDistributedLogSplitting {
"No RegionInRecoveryException. Following exceptions returned=" + re.getCauses(),
foundRegionInRecoveryException);
}
ht.close();
zkw.close();
} finally {
if (ht != null) ht.close();
if (ht != null) zkw.close();
}
}
/**
@ -987,9 +1011,9 @@ public class TestDistributedLogSplitting {
final Path logDir = new Path(rootdir,
DefaultWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
installTable(new ZooKeeperWatcher(conf, "table-creation", null),
Table t = installTable(new ZooKeeperWatcher(conf, "table-creation", null),
"table", "family", 40);
try {
makeWAL(hrs, ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()),
"table", "family", NUM_LOG_LINES, 100);
@ -1029,6 +1053,9 @@ public class TestDistributedLogSplitting {
"tot_wkr_task_resigned, tot_wkr_task_err, " +
"tot_wkr_final_transition_failed, tot_wkr_task_done, " +
"tot_wkr_preempt_task");
} finally {
if (t != null) t.close();
}
}
@Test (timeout=300000)
@ -1043,6 +1070,7 @@ public class TestDistributedLogSplitting {
"distributed log splitting test", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
populateDataInTable(NUM_ROWS_PER_REGION, "family");
@ -1081,8 +1109,10 @@ public class TestDistributedLogSplitting {
assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION,
TEST_UTIL.countRows(ht));
ht.close();
zkw.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
@ -1219,7 +1249,7 @@ public class TestDistributedLogSplitting {
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
List<HRegionInfo> regions = null;
HRegionServer hrs = null;
for (int i = 0; i < NUM_RS; i++) {
@ -1243,7 +1273,8 @@ public class TestDistributedLogSplitting {
while (it.hasNext()) {
HRegionInfo region = it.next();
if (region.isMetaTable()
|| region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
|| region.getEncodedName().equals(
HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
it.remove();
}
}
@ -1291,7 +1322,10 @@ public class TestDistributedLogSplitting {
r = ht.get(g);
theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
assertEquals(value, theStoredVal);
ht.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
@Test(timeout = 300000)
@ -1312,7 +1346,7 @@ public class TestDistributedLogSplitting {
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
try {
List<HRegionInfo> regions = null;
HRegionServer hrs = null;
for (int i = 0; i < NUM_RS; i++) {
@ -1393,7 +1427,10 @@ public class TestDistributedLogSplitting {
r = ht.get(g);
theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
assertEquals(value, theStoredVal);
ht.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
@Test(timeout = 300000)
@ -1402,6 +1439,7 @@ public class TestDistributedLogSplitting {
startCluster(2);
final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", 10);
try {
FileSystem fs = master.getMasterFileSystem().getFileSystem();
Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), TableName.valueOf("table"));
List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
@ -1421,10 +1459,13 @@ public class TestDistributedLogSplitting {
assertEquals(1, files.length);
// verify all seqId files aren't treated as recovered.edits files
NavigableSet<Path> recoveredEdits = WALSplitter.getSplitEditFilesSorted(fs, regionDirs.get(0));
NavigableSet<Path> recoveredEdits =
WALSplitter.getSplitEditFilesSorted(fs, regionDirs.get(0));
assertEquals(0, recoveredEdits.size());
ht.close();
} finally {
if (ht != null) ht.close();
if (zkw != null) zkw.close();
}
}
Table installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs) throws Exception {
@ -1566,7 +1607,8 @@ public class TestDistributedLogSplitting {
// key
byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i));
e.add(new KeyValue(row, family, qualifier, System.currentTimeMillis(), value));
log.append(htd, curRegionInfo, new HLogKey(curRegionInfo.getEncodedNameAsBytes(), fullTName,
log.append(htd, curRegionInfo,
new HLogKey(curRegionInfo.getEncodedNameAsBytes(), fullTName,
System.currentTimeMillis()), e, sequenceId, true, null);
if (0 == i % syncEvery) {
log.sync();

View File

@ -1234,7 +1234,7 @@
<jersey.version>1.9</jersey.version>
<jmock-junit4.version>2.6.0</jmock-junit4.version>
<jruby.version>1.6.8</jruby.version>
<junit.version>4.11</junit.version>
<junit.version>4.12</junit.version>
<hamcrest.version>1.3</hamcrest.version>
<htrace.version>3.1.0-incubating</htrace.version>
<log4j.version>1.2.17</log4j.version>