diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationKillMasterRS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationKillMasterRS.java new file mode 100644 index 00000000000..ca0fd3172fc --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationKillMasterRS.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.replication; + +import org.apache.hadoop.hbase.LargeTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Runs the TestReplicationKillRS test and selects the RS to kill in the master cluster + * Do not add other tests in this class. + */ +@Category(LargeTests.class) +public class TestReplicationKillMasterRS extends TestReplicationKillRS { + + @Test(timeout=300000) + public void killOneMasterRS() throws Exception { + loadTableAndKillRS(utility1); + } + +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationKillRS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationKillRS.java new file mode 100644 index 00000000000..fc882a029ce --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationKillRS.java @@ -0,0 +1,135 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.replication; + + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.impl.Log4JLogger; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.UnknownScannerException; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.replication.regionserver.ReplicationSource; +import org.apache.log4j.Level; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import static org.junit.Assert.fail; + +@Category(LargeTests.class) +public class TestReplicationKillRS extends TestReplicationBase { + + { + ((Log4JLogger) ReplicationSource.LOG).getLogger().setLevel(Level.ALL); + } + + private static final Log LOG = LogFactory.getLog(TestReplicationKillRS.class); + + /** + * Load up 1 tables over 2 region servers and kill a source during + * the upload. The failover happens internally. + * + * WARNING this test sometimes fails because of HBASE-3515 + * + * @throws Exception + */ + public void loadTableAndKillRS(HBaseTestingUtility util) throws Exception { + // killing the RS with .META. can result into failed puts until we solve + // IO fencing + int rsToKill1 = + util.getHBaseCluster().getServerWithMeta() == 0 ? 1 : 0; + + // Takes about 20 secs to run the full loading, kill around the middle + Thread killer = killARegionServer(util, 5000, rsToKill1); + + LOG.info("Start loading table"); + int initialCount = utility1.loadTable(htable1, famName); + LOG.info("Done loading table"); + killer.join(5000); + LOG.info("Done waiting for threads"); + + Result[] res; + while (true) { + try { + Scan scan = new Scan(); + ResultScanner scanner = htable1.getScanner(scan); + res = scanner.next(initialCount); + scanner.close(); + break; + } catch (UnknownScannerException ex) { + LOG.info("Cluster wasn't ready yet, restarting scanner"); + } + } + // Test we actually have all the rows, we may miss some because we + // don't have IO fencing. + if (res.length != initialCount) { + LOG.warn("We lost some rows on the master cluster!"); + // We don't really expect the other cluster to have more rows + initialCount = res.length; + } + + int lastCount = 0; + + final long start = System.currentTimeMillis(); + int i = 0; + while (true) { + if (i==NB_RETRIES-1) { + fail("Waited too much time for queueFailover replication. " + + "Waited "+(System.currentTimeMillis() - start)+"ms."); + } + Scan scan2 = new Scan(); + ResultScanner scanner2 = htable2.getScanner(scan2); + Result[] res2 = scanner2.next(initialCount * 2); + scanner2.close(); + if (res2.length < initialCount) { + if (lastCount < res2.length) { + i--; // Don't increment timeout if we make progress + } else { + i++; + } + lastCount = res2.length; + LOG.info("Only got " + lastCount + " rows instead of " + + initialCount + " current i=" + i); + Thread.sleep(SLEEP_TIME*2); + } else { + break; + } + } + } + + private static Thread killARegionServer(final HBaseTestingUtility utility, + final long timeout, final int rs) { + Thread killer = new Thread() { + public void run() { + try { + Thread.sleep(timeout); + utility.getHBaseCluster().getRegionServer(rs).stop("Stopping as part of the test"); + } catch (Exception e) { + LOG.error("Couldn't kill a region server", e); + } + } + }; + killer.setDaemon(true); + killer.start(); + return killer; + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationKillSlaveRS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationKillSlaveRS.java new file mode 100644 index 00000000000..ae95c4f0535 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationKillSlaveRS.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.replication; + +import org.apache.hadoop.hbase.LargeTests; +import org.junit.Test; +import org.junit.Ignore; +import org.junit.experimental.categories.Category; + +/** + * Runs the TestReplicationKillRS test and selects the RS to kill in the slave cluster + * Do not add other tests in this class. + */ +@Category(LargeTests.class) +public class TestReplicationKillSlaveRS extends TestReplicationKillRS { + + @Test(timeout=300000) + public void killOneSlaveRS() throws Exception { + loadTableAndKillRS(utility2); + } +}