HBASE-18357 Enable disabled tests in TestHCM that were disabled by Proc-V2 AM in HBASE-14614
Restore testRegionCaching and testMulti to working state (required fixing move procedure and looking for a new exception). testClusterStatus is broke because multicast is broken.
This commit is contained in:
parent
c5ad801754
commit
e063b231da
|
@ -65,7 +65,8 @@ public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure<Mov
|
|||
}
|
||||
switch (state) {
|
||||
case MOVE_REGION_UNASSIGN:
|
||||
addChildProcedure(new UnassignProcedure(plan.getRegionInfo(), plan.getSource(), true));
|
||||
addChildProcedure(new UnassignProcedure(plan.getRegionInfo(), plan.getSource(),
|
||||
plan.getDestination(), true));
|
||||
setNextState(MoveRegionState.MOVE_REGION_ASSIGN);
|
||||
break;
|
||||
case MOVE_REGION_ASSIGN:
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.client;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.CategoryBasedTimeout;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
|
||||
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
|
||||
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
|
||||
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
|
||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.rules.TestName;
|
||||
import org.junit.rules.TestRule;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
/**
|
||||
* Test a drop timeout request.
|
||||
* This test used to be in TestHCM but it has particulare requirements -- i.e. one handler only --
|
||||
* so run it apart from the rest of TestHCM.
|
||||
*/
|
||||
@Category({MediumTests.class})
|
||||
public class TestDropTimeoutRequest {
|
||||
@Rule
|
||||
public final TestRule timeout = CategoryBasedTimeout.builder()
|
||||
.withTimeout(this.getClass())
|
||||
.withLookingForStuckThread(true)
|
||||
.build();
|
||||
@Rule
|
||||
public TestName name = new TestName();
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(TestDropTimeoutRequest.class);
|
||||
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||
private static final byte[] FAM_NAM = Bytes.toBytes("f");
|
||||
private static final int RPC_RETRY = 5;
|
||||
|
||||
/**
|
||||
* Coprocessor that sleeps a while the first time you do a Get
|
||||
*/
|
||||
public static class SleepLongerAtFirstCoprocessor implements RegionCoprocessor, RegionObserver {
|
||||
public static final int SLEEP_TIME = 2000;
|
||||
static final AtomicLong ct = new AtomicLong(0);
|
||||
|
||||
@Override
|
||||
public Optional<RegionObserver> getRegionObserver() {
|
||||
return Optional.of(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void preGetOp(final ObserverContext<RegionCoprocessorEnvironment> e,
|
||||
final Get get, final List<Cell> results) throws IOException {
|
||||
// After first sleep, all requests are timeout except the last retry. If we handle
|
||||
// all the following requests, finally the last request is also timeout. If we drop all
|
||||
// timeout requests, we can handle the last request immediately and it will not timeout.
|
||||
if (ct.incrementAndGet() <= 1) {
|
||||
Threads.sleep(SLEEP_TIME * RPC_RETRY * 2);
|
||||
} else {
|
||||
Threads.sleep(SLEEP_TIME);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
TEST_UTIL.getConfiguration().setBoolean(HConstants.STATUS_PUBLISHED, true);
|
||||
// Up the handlers; this test needs more than usual.
|
||||
TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
|
||||
TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, RPC_RETRY);
|
||||
// Simulate queue blocking in testDropTimeoutRequest
|
||||
TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HANDLER_COUNT, 1);
|
||||
TEST_UTIL.startMiniCluster(2);
|
||||
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDownAfterClass() throws Exception {
|
||||
TEST_UTIL.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDropTimeoutRequest() throws Exception {
|
||||
// Simulate the situation that the server is slow and client retries for several times because
|
||||
// of timeout. When a request can be handled after waiting in the queue, we will drop it if
|
||||
// it has been considered as timeout at client. If we don't drop it, the server will waste time
|
||||
// on handling timeout requests and finally all requests timeout and client throws exception.
|
||||
TableDescriptorBuilder builder =
|
||||
TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()));
|
||||
builder.addCoprocessor(SleepLongerAtFirstCoprocessor.class.getName());
|
||||
ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(FAM_NAM).build();
|
||||
builder.addColumnFamily(cfd);
|
||||
TableDescriptor td = builder.build();
|
||||
try (Admin admin = TEST_UTIL.getConnection().getAdmin()) {
|
||||
admin.createTable(td);
|
||||
}
|
||||
TableBuilder tb = TEST_UTIL.getConnection().getTableBuilder(td.getTableName(), null);
|
||||
tb.setReadRpcTimeout(SleepLongerAtFirstCoprocessor.SLEEP_TIME * 2);
|
||||
tb.setWriteRpcTimeout(SleepLongerAtFirstCoprocessor.SLEEP_TIME * 2);
|
||||
try (Table table = tb.build()) {
|
||||
table.get(new Get(FAM_NAM));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,5 +1,4 @@
|
|||
/*
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
|
@ -218,45 +217,21 @@ public class TestHCM {
|
|||
|
||||
}
|
||||
|
||||
public static class SleepLongerAtFirstCoprocessor implements RegionCoprocessor, RegionObserver {
|
||||
public static final int SLEEP_TIME = 2000;
|
||||
static final AtomicLong ct = new AtomicLong(0);
|
||||
|
||||
@Override
|
||||
public Optional<RegionObserver> getRegionObserver() {
|
||||
return Optional.of(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void preGetOp(final ObserverContext<RegionCoprocessorEnvironment> e,
|
||||
final Get get, final List<Cell> results) throws IOException {
|
||||
// After first sleep, all requests are timeout except the last retry. If we handle
|
||||
// all the following requests, finally the last request is also timeout. If we drop all
|
||||
// timeout requests, we can handle the last request immediately and it will not timeout.
|
||||
if (ct.incrementAndGet() <= 1) {
|
||||
Threads.sleep(SLEEP_TIME * RPC_RETRY * 2);
|
||||
} else {
|
||||
Threads.sleep(SLEEP_TIME);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
TEST_UTIL.getConfiguration().setBoolean(HConstants.STATUS_PUBLISHED, true);
|
||||
// Up the handlers; this test needs more than usual.
|
||||
TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
|
||||
TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, RPC_RETRY);
|
||||
// simulate queue blocking in testDropTimeoutRequest
|
||||
TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HANDLER_COUNT, 1);
|
||||
TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HANDLER_COUNT, 3);
|
||||
TEST_UTIL.startMiniCluster(2);
|
||||
|
||||
}
|
||||
|
||||
@AfterClass public static void tearDownAfterClass() throws Exception {
|
||||
TEST_UTIL.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClusterConnection() throws IOException {
|
||||
ThreadPoolExecutor otherPool = new ThreadPoolExecutor(1, 1,
|
||||
5, TimeUnit.SECONDS,
|
||||
|
@ -341,7 +316,11 @@ public class TestHCM {
|
|||
}
|
||||
|
||||
// Fails too often! Needs work. HBASE-12558
|
||||
// May only fail on non-linux machines? E.g. macosx.
|
||||
@Ignore @Test (expected = RegionServerStoppedException.class)
|
||||
// Depends on mulitcast messaging facility that seems broken in hbase2
|
||||
// See HBASE-19261 "ClusterStatusPublisher where Master could optionally broadcast notice of
|
||||
// dead servers is broke"
|
||||
public void testClusterStatus() throws Exception {
|
||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||
byte[] cf = "cf".getBytes();
|
||||
|
@ -625,21 +604,6 @@ public class TestHCM {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDropTimeoutRequest() throws Exception {
|
||||
// Simulate the situation that the server is slow and client retries for several times because
|
||||
// of timeout. When a request can be handled after waiting in the queue, we will drop it if
|
||||
// it has been considered as timeout at client. If we don't drop it, the server will waste time
|
||||
// on handling timeout requests and finally all requests timeout and client throws exception.
|
||||
HTableDescriptor hdt = TEST_UTIL.createTableDescriptor(TableName.valueOf(name.getMethodName()));
|
||||
hdt.addCoprocessor(SleepLongerAtFirstCoprocessor.class.getName());
|
||||
Configuration c = new Configuration(TEST_UTIL.getConfiguration());
|
||||
try (Table t = TEST_UTIL.createTable(hdt, new byte[][] { FAM_NAM }, c)) {
|
||||
t.setRpcTimeout(SleepLongerAtFirstCoprocessor.SLEEP_TIME * 2);
|
||||
t.get(new Get(FAM_NAM));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test starting from 0 index when RpcRetryingCaller calculate the backoff time.
|
||||
*/
|
||||
|
@ -986,8 +950,8 @@ public class TestHCM {
|
|||
* that we really delete it.
|
||||
* @throws Exception
|
||||
*/
|
||||
@Ignore @Test
|
||||
public void testRegionCaching() throws Exception{
|
||||
@Test
|
||||
public void testRegionCaching() throws Exception {
|
||||
TEST_UTIL.createMultiRegionTable(TABLE_NAME, FAM_NAM).close();
|
||||
Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
|
||||
// test with no retry, or client cache will get updated after the first failure
|
||||
|
@ -999,12 +963,15 @@ public class TestHCM {
|
|||
Put put = new Put(ROW);
|
||||
put.addColumn(FAM_NAM, ROW, ROW);
|
||||
table.put(put);
|
||||
|
||||
ConnectionImplementation conn = (ConnectionImplementation) connection;
|
||||
|
||||
assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
|
||||
|
||||
final int nextPort = conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation().getPort() + 1;
|
||||
// Here we mess with the cached location making it so the region at TABLE_NAME, ROW is at
|
||||
// a location where the port is current port number +1 -- i.e. a non-existent location.
|
||||
HRegionLocation loc = conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation();
|
||||
final int nextPort = loc.getPort() + 1;
|
||||
conn.updateCachedLocation(loc.getRegionInfo(), loc.getServerName(),
|
||||
ServerName.valueOf("127.0.0.1", nextPort,
|
||||
HConstants.LATEST_TIMESTAMP), HConstants.LATEST_TIMESTAMP);
|
||||
|
@ -1038,7 +1005,7 @@ public class TestHCM {
|
|||
|
||||
// Choose the other server.
|
||||
int curServerId = TEST_UTIL.getHBaseCluster().getServerWith(regionName);
|
||||
int destServerId = (curServerId == 0 ? 1 : 0);
|
||||
int destServerId = curServerId == 0? 1: 0;
|
||||
|
||||
HRegionServer curServer = TEST_UTIL.getHBaseCluster().getRegionServer(curServerId);
|
||||
HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(destServerId);
|
||||
|
@ -1055,7 +1022,7 @@ public class TestHCM {
|
|||
getAssignmentManager().hasRegionsInTransition());
|
||||
|
||||
// Moving. It's possible that we don't have all the regions online at this point, so
|
||||
// the test must depends only on the region we're looking at.
|
||||
// the test must depend only on the region we're looking at.
|
||||
LOG.info("Move starting region="+toMove.getRegionInfo().getRegionNameAsString());
|
||||
TEST_UTIL.getAdmin().move(
|
||||
toMove.getRegionInfo().getEncodedNameAsBytes(),
|
||||
|
@ -1092,7 +1059,7 @@ public class TestHCM {
|
|||
try {
|
||||
table.put(put3);
|
||||
Assert.fail("Unreachable point");
|
||||
} catch (RetriesExhaustedWithDetailsException e){
|
||||
} catch (RetriesExhaustedWithDetailsException e) {
|
||||
LOG.info("Put done, exception caught: " + e.getClass());
|
||||
Assert.assertEquals(1, e.getNumExceptions());
|
||||
Assert.assertEquals(1, e.getCauses().size());
|
||||
|
@ -1102,6 +1069,13 @@ public class TestHCM {
|
|||
Throwable cause = ClientExceptionsUtil.findException(e.getCause(0));
|
||||
Assert.assertNotNull(cause);
|
||||
Assert.assertTrue(cause instanceof RegionMovedException);
|
||||
} catch (RetriesExhaustedException ree) {
|
||||
// hbase2 throws RetriesExhaustedException instead of RetriesExhaustedWithDetailsException
|
||||
// as hbase1 used to do. Keep an eye on this to see if this changed behavior is an issue.
|
||||
LOG.info("Put done, exception caught: " + ree.getClass());
|
||||
Throwable cause = ClientExceptionsUtil.findException(ree.getCause());
|
||||
Assert.assertNotNull(cause);
|
||||
Assert.assertTrue(cause instanceof RegionMovedException);
|
||||
}
|
||||
Assert.assertNotNull("Cached connection is null", conn.getCachedLocation(TABLE_NAME, ROW));
|
||||
Assert.assertEquals(
|
||||
|
@ -1309,12 +1283,11 @@ public class TestHCM {
|
|||
return prevNumRetriesVal;
|
||||
}
|
||||
|
||||
@Ignore @Test
|
||||
@Test
|
||||
public void testMulti() throws Exception {
|
||||
Table table = TEST_UTIL.createMultiRegionTable(TABLE_NAME3, FAM_NAM);
|
||||
try {
|
||||
ConnectionImplementation conn =
|
||||
(ConnectionImplementation)TEST_UTIL.getConnection();
|
||||
ConnectionImplementation conn = (ConnectionImplementation)TEST_UTIL.getConnection();
|
||||
|
||||
// We're now going to move the region and check that it works for the client
|
||||
// First a new put to add the location in the cache
|
||||
|
@ -1345,7 +1318,6 @@ public class TestHCM {
|
|||
|
||||
ServerName destServerName = destServer.getServerName();
|
||||
ServerName metaServerName = TEST_UTIL.getHBaseCluster().getServerHoldingMeta();
|
||||
assertTrue(!destServerName.equals(metaServerName));
|
||||
|
||||
//find another row in the cur server that is less than ROW_X
|
||||
List<HRegion> regions = curServer.getRegions(TABLE_NAME3);
|
||||
|
|
Loading…
Reference in New Issue