HBASE-18357 Enable disabled tests in TestHCM that were disabled by Proc-V2 AM in HBASE-14614
Restore testRegionCaching and testMulti to working state (required fixing move procedure and looking for a new exception). testClusterStatus is broke because multicast is broken.
This commit is contained in:
parent
c5ad801754
commit
e063b231da
|
@ -65,7 +65,8 @@ public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure<Mov
|
||||||
}
|
}
|
||||||
switch (state) {
|
switch (state) {
|
||||||
case MOVE_REGION_UNASSIGN:
|
case MOVE_REGION_UNASSIGN:
|
||||||
addChildProcedure(new UnassignProcedure(plan.getRegionInfo(), plan.getSource(), true));
|
addChildProcedure(new UnassignProcedure(plan.getRegionInfo(), plan.getSource(),
|
||||||
|
plan.getDestination(), true));
|
||||||
setNextState(MoveRegionState.MOVE_REGION_ASSIGN);
|
setNextState(MoveRegionState.MOVE_REGION_ASSIGN);
|
||||||
break;
|
break;
|
||||||
case MOVE_REGION_ASSIGN:
|
case MOVE_REGION_ASSIGN:
|
||||||
|
|
|
@ -0,0 +1,133 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.client;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.hbase.CategoryBasedTimeout;
|
||||||
|
import org.apache.hadoop.hbase.Cell;
|
||||||
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.hbase.util.Threads;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
import org.junit.rules.TestName;
|
||||||
|
import org.junit.rules.TestRule;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test a drop timeout request.
|
||||||
|
* This test used to be in TestHCM but it has particulare requirements -- i.e. one handler only --
|
||||||
|
* so run it apart from the rest of TestHCM.
|
||||||
|
*/
|
||||||
|
@Category({MediumTests.class})
|
||||||
|
public class TestDropTimeoutRequest {
|
||||||
|
@Rule
|
||||||
|
public final TestRule timeout = CategoryBasedTimeout.builder()
|
||||||
|
.withTimeout(this.getClass())
|
||||||
|
.withLookingForStuckThread(true)
|
||||||
|
.build();
|
||||||
|
@Rule
|
||||||
|
public TestName name = new TestName();
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(TestDropTimeoutRequest.class);
|
||||||
|
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||||
|
private static final byte[] FAM_NAM = Bytes.toBytes("f");
|
||||||
|
private static final int RPC_RETRY = 5;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Coprocessor that sleeps a while the first time you do a Get
|
||||||
|
*/
|
||||||
|
public static class SleepLongerAtFirstCoprocessor implements RegionCoprocessor, RegionObserver {
|
||||||
|
public static final int SLEEP_TIME = 2000;
|
||||||
|
static final AtomicLong ct = new AtomicLong(0);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<RegionObserver> getRegionObserver() {
|
||||||
|
return Optional.of(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void preGetOp(final ObserverContext<RegionCoprocessorEnvironment> e,
|
||||||
|
final Get get, final List<Cell> results) throws IOException {
|
||||||
|
// After first sleep, all requests are timeout except the last retry. If we handle
|
||||||
|
// all the following requests, finally the last request is also timeout. If we drop all
|
||||||
|
// timeout requests, we can handle the last request immediately and it will not timeout.
|
||||||
|
if (ct.incrementAndGet() <= 1) {
|
||||||
|
Threads.sleep(SLEEP_TIME * RPC_RETRY * 2);
|
||||||
|
} else {
|
||||||
|
Threads.sleep(SLEEP_TIME);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUpBeforeClass() throws Exception {
|
||||||
|
TEST_UTIL.getConfiguration().setBoolean(HConstants.STATUS_PUBLISHED, true);
|
||||||
|
// Up the handlers; this test needs more than usual.
|
||||||
|
TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
|
||||||
|
TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, RPC_RETRY);
|
||||||
|
// Simulate queue blocking in testDropTimeoutRequest
|
||||||
|
TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HANDLER_COUNT, 1);
|
||||||
|
TEST_UTIL.startMiniCluster(2);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDownAfterClass() throws Exception {
|
||||||
|
TEST_UTIL.shutdownMiniCluster();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDropTimeoutRequest() throws Exception {
|
||||||
|
// Simulate the situation that the server is slow and client retries for several times because
|
||||||
|
// of timeout. When a request can be handled after waiting in the queue, we will drop it if
|
||||||
|
// it has been considered as timeout at client. If we don't drop it, the server will waste time
|
||||||
|
// on handling timeout requests and finally all requests timeout and client throws exception.
|
||||||
|
TableDescriptorBuilder builder =
|
||||||
|
TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()));
|
||||||
|
builder.addCoprocessor(SleepLongerAtFirstCoprocessor.class.getName());
|
||||||
|
ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(FAM_NAM).build();
|
||||||
|
builder.addColumnFamily(cfd);
|
||||||
|
TableDescriptor td = builder.build();
|
||||||
|
try (Admin admin = TEST_UTIL.getConnection().getAdmin()) {
|
||||||
|
admin.createTable(td);
|
||||||
|
}
|
||||||
|
TableBuilder tb = TEST_UTIL.getConnection().getTableBuilder(td.getTableName(), null);
|
||||||
|
tb.setReadRpcTimeout(SleepLongerAtFirstCoprocessor.SLEEP_TIME * 2);
|
||||||
|
tb.setWriteRpcTimeout(SleepLongerAtFirstCoprocessor.SLEEP_TIME * 2);
|
||||||
|
try (Table table = tb.build()) {
|
||||||
|
table.get(new Get(FAM_NAM));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,5 +1,4 @@
|
||||||
/*
|
/*
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -218,45 +217,21 @@ public class TestHCM {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class SleepLongerAtFirstCoprocessor implements RegionCoprocessor, RegionObserver {
|
|
||||||
public static final int SLEEP_TIME = 2000;
|
|
||||||
static final AtomicLong ct = new AtomicLong(0);
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Optional<RegionObserver> getRegionObserver() {
|
|
||||||
return Optional.of(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void preGetOp(final ObserverContext<RegionCoprocessorEnvironment> e,
|
|
||||||
final Get get, final List<Cell> results) throws IOException {
|
|
||||||
// After first sleep, all requests are timeout except the last retry. If we handle
|
|
||||||
// all the following requests, finally the last request is also timeout. If we drop all
|
|
||||||
// timeout requests, we can handle the last request immediately and it will not timeout.
|
|
||||||
if (ct.incrementAndGet() <= 1) {
|
|
||||||
Threads.sleep(SLEEP_TIME * RPC_RETRY * 2);
|
|
||||||
} else {
|
|
||||||
Threads.sleep(SLEEP_TIME);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void setUpBeforeClass() throws Exception {
|
public static void setUpBeforeClass() throws Exception {
|
||||||
TEST_UTIL.getConfiguration().setBoolean(HConstants.STATUS_PUBLISHED, true);
|
TEST_UTIL.getConfiguration().setBoolean(HConstants.STATUS_PUBLISHED, true);
|
||||||
// Up the handlers; this test needs more than usual.
|
// Up the handlers; this test needs more than usual.
|
||||||
TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
|
TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
|
||||||
TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, RPC_RETRY);
|
TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, RPC_RETRY);
|
||||||
// simulate queue blocking in testDropTimeoutRequest
|
TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HANDLER_COUNT, 3);
|
||||||
TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HANDLER_COUNT, 1);
|
|
||||||
TEST_UTIL.startMiniCluster(2);
|
TEST_UTIL.startMiniCluster(2);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterClass public static void tearDownAfterClass() throws Exception {
|
@AfterClass public static void tearDownAfterClass() throws Exception {
|
||||||
TEST_UTIL.shutdownMiniCluster();
|
TEST_UTIL.shutdownMiniCluster();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testClusterConnection() throws IOException {
|
public void testClusterConnection() throws IOException {
|
||||||
ThreadPoolExecutor otherPool = new ThreadPoolExecutor(1, 1,
|
ThreadPoolExecutor otherPool = new ThreadPoolExecutor(1, 1,
|
||||||
5, TimeUnit.SECONDS,
|
5, TimeUnit.SECONDS,
|
||||||
|
@ -341,7 +316,11 @@ public class TestHCM {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fails too often! Needs work. HBASE-12558
|
// Fails too often! Needs work. HBASE-12558
|
||||||
|
// May only fail on non-linux machines? E.g. macosx.
|
||||||
@Ignore @Test (expected = RegionServerStoppedException.class)
|
@Ignore @Test (expected = RegionServerStoppedException.class)
|
||||||
|
// Depends on mulitcast messaging facility that seems broken in hbase2
|
||||||
|
// See HBASE-19261 "ClusterStatusPublisher where Master could optionally broadcast notice of
|
||||||
|
// dead servers is broke"
|
||||||
public void testClusterStatus() throws Exception {
|
public void testClusterStatus() throws Exception {
|
||||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
byte[] cf = "cf".getBytes();
|
byte[] cf = "cf".getBytes();
|
||||||
|
@ -625,21 +604,6 @@ public class TestHCM {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testDropTimeoutRequest() throws Exception {
|
|
||||||
// Simulate the situation that the server is slow and client retries for several times because
|
|
||||||
// of timeout. When a request can be handled after waiting in the queue, we will drop it if
|
|
||||||
// it has been considered as timeout at client. If we don't drop it, the server will waste time
|
|
||||||
// on handling timeout requests and finally all requests timeout and client throws exception.
|
|
||||||
HTableDescriptor hdt = TEST_UTIL.createTableDescriptor(TableName.valueOf(name.getMethodName()));
|
|
||||||
hdt.addCoprocessor(SleepLongerAtFirstCoprocessor.class.getName());
|
|
||||||
Configuration c = new Configuration(TEST_UTIL.getConfiguration());
|
|
||||||
try (Table t = TEST_UTIL.createTable(hdt, new byte[][] { FAM_NAM }, c)) {
|
|
||||||
t.setRpcTimeout(SleepLongerAtFirstCoprocessor.SLEEP_TIME * 2);
|
|
||||||
t.get(new Get(FAM_NAM));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test starting from 0 index when RpcRetryingCaller calculate the backoff time.
|
* Test starting from 0 index when RpcRetryingCaller calculate the backoff time.
|
||||||
*/
|
*/
|
||||||
|
@ -986,7 +950,7 @@ public class TestHCM {
|
||||||
* that we really delete it.
|
* that we really delete it.
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
@Ignore @Test
|
@Test
|
||||||
public void testRegionCaching() throws Exception {
|
public void testRegionCaching() throws Exception {
|
||||||
TEST_UTIL.createMultiRegionTable(TABLE_NAME, FAM_NAM).close();
|
TEST_UTIL.createMultiRegionTable(TABLE_NAME, FAM_NAM).close();
|
||||||
Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
|
Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
|
||||||
|
@ -999,12 +963,15 @@ public class TestHCM {
|
||||||
Put put = new Put(ROW);
|
Put put = new Put(ROW);
|
||||||
put.addColumn(FAM_NAM, ROW, ROW);
|
put.addColumn(FAM_NAM, ROW, ROW);
|
||||||
table.put(put);
|
table.put(put);
|
||||||
|
|
||||||
ConnectionImplementation conn = (ConnectionImplementation) connection;
|
ConnectionImplementation conn = (ConnectionImplementation) connection;
|
||||||
|
|
||||||
assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
|
assertNotNull(conn.getCachedLocation(TABLE_NAME, ROW));
|
||||||
|
|
||||||
final int nextPort = conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation().getPort() + 1;
|
// Here we mess with the cached location making it so the region at TABLE_NAME, ROW is at
|
||||||
|
// a location where the port is current port number +1 -- i.e. a non-existent location.
|
||||||
HRegionLocation loc = conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation();
|
HRegionLocation loc = conn.getCachedLocation(TABLE_NAME, ROW).getRegionLocation();
|
||||||
|
final int nextPort = loc.getPort() + 1;
|
||||||
conn.updateCachedLocation(loc.getRegionInfo(), loc.getServerName(),
|
conn.updateCachedLocation(loc.getRegionInfo(), loc.getServerName(),
|
||||||
ServerName.valueOf("127.0.0.1", nextPort,
|
ServerName.valueOf("127.0.0.1", nextPort,
|
||||||
HConstants.LATEST_TIMESTAMP), HConstants.LATEST_TIMESTAMP);
|
HConstants.LATEST_TIMESTAMP), HConstants.LATEST_TIMESTAMP);
|
||||||
|
@ -1038,7 +1005,7 @@ public class TestHCM {
|
||||||
|
|
||||||
// Choose the other server.
|
// Choose the other server.
|
||||||
int curServerId = TEST_UTIL.getHBaseCluster().getServerWith(regionName);
|
int curServerId = TEST_UTIL.getHBaseCluster().getServerWith(regionName);
|
||||||
int destServerId = (curServerId == 0 ? 1 : 0);
|
int destServerId = curServerId == 0? 1: 0;
|
||||||
|
|
||||||
HRegionServer curServer = TEST_UTIL.getHBaseCluster().getRegionServer(curServerId);
|
HRegionServer curServer = TEST_UTIL.getHBaseCluster().getRegionServer(curServerId);
|
||||||
HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(destServerId);
|
HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(destServerId);
|
||||||
|
@ -1055,7 +1022,7 @@ public class TestHCM {
|
||||||
getAssignmentManager().hasRegionsInTransition());
|
getAssignmentManager().hasRegionsInTransition());
|
||||||
|
|
||||||
// Moving. It's possible that we don't have all the regions online at this point, so
|
// Moving. It's possible that we don't have all the regions online at this point, so
|
||||||
// the test must depends only on the region we're looking at.
|
// the test must depend only on the region we're looking at.
|
||||||
LOG.info("Move starting region="+toMove.getRegionInfo().getRegionNameAsString());
|
LOG.info("Move starting region="+toMove.getRegionInfo().getRegionNameAsString());
|
||||||
TEST_UTIL.getAdmin().move(
|
TEST_UTIL.getAdmin().move(
|
||||||
toMove.getRegionInfo().getEncodedNameAsBytes(),
|
toMove.getRegionInfo().getEncodedNameAsBytes(),
|
||||||
|
@ -1102,6 +1069,13 @@ public class TestHCM {
|
||||||
Throwable cause = ClientExceptionsUtil.findException(e.getCause(0));
|
Throwable cause = ClientExceptionsUtil.findException(e.getCause(0));
|
||||||
Assert.assertNotNull(cause);
|
Assert.assertNotNull(cause);
|
||||||
Assert.assertTrue(cause instanceof RegionMovedException);
|
Assert.assertTrue(cause instanceof RegionMovedException);
|
||||||
|
} catch (RetriesExhaustedException ree) {
|
||||||
|
// hbase2 throws RetriesExhaustedException instead of RetriesExhaustedWithDetailsException
|
||||||
|
// as hbase1 used to do. Keep an eye on this to see if this changed behavior is an issue.
|
||||||
|
LOG.info("Put done, exception caught: " + ree.getClass());
|
||||||
|
Throwable cause = ClientExceptionsUtil.findException(ree.getCause());
|
||||||
|
Assert.assertNotNull(cause);
|
||||||
|
Assert.assertTrue(cause instanceof RegionMovedException);
|
||||||
}
|
}
|
||||||
Assert.assertNotNull("Cached connection is null", conn.getCachedLocation(TABLE_NAME, ROW));
|
Assert.assertNotNull("Cached connection is null", conn.getCachedLocation(TABLE_NAME, ROW));
|
||||||
Assert.assertEquals(
|
Assert.assertEquals(
|
||||||
|
@ -1309,12 +1283,11 @@ public class TestHCM {
|
||||||
return prevNumRetriesVal;
|
return prevNumRetriesVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ignore @Test
|
@Test
|
||||||
public void testMulti() throws Exception {
|
public void testMulti() throws Exception {
|
||||||
Table table = TEST_UTIL.createMultiRegionTable(TABLE_NAME3, FAM_NAM);
|
Table table = TEST_UTIL.createMultiRegionTable(TABLE_NAME3, FAM_NAM);
|
||||||
try {
|
try {
|
||||||
ConnectionImplementation conn =
|
ConnectionImplementation conn = (ConnectionImplementation)TEST_UTIL.getConnection();
|
||||||
(ConnectionImplementation)TEST_UTIL.getConnection();
|
|
||||||
|
|
||||||
// We're now going to move the region and check that it works for the client
|
// We're now going to move the region and check that it works for the client
|
||||||
// First a new put to add the location in the cache
|
// First a new put to add the location in the cache
|
||||||
|
@ -1345,7 +1318,6 @@ public class TestHCM {
|
||||||
|
|
||||||
ServerName destServerName = destServer.getServerName();
|
ServerName destServerName = destServer.getServerName();
|
||||||
ServerName metaServerName = TEST_UTIL.getHBaseCluster().getServerHoldingMeta();
|
ServerName metaServerName = TEST_UTIL.getHBaseCluster().getServerHoldingMeta();
|
||||||
assertTrue(!destServerName.equals(metaServerName));
|
|
||||||
|
|
||||||
//find another row in the cur server that is less than ROW_X
|
//find another row in the cur server that is less than ROW_X
|
||||||
List<HRegion> regions = curServer.getRegions(TABLE_NAME3);
|
List<HRegion> regions = curServer.getRegions(TABLE_NAME3);
|
||||||
|
|
Loading…
Reference in New Issue