HBASE-2691 LeaseStillHeldException totally ignored by RS, wrongly named
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@952869 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dc641719fa
commit
d3fb45f189
|
@ -379,6 +379,7 @@ Release 0.21.0 - Unreleased
|
|||
HBASE-2676 TestInfoServers should use ephemeral ports
|
||||
HBASE-2616 TestHRegion.testWritesWhileGetting flaky on trunk
|
||||
HBASE-2684 TestMasterWrongRS flaky in trunk
|
||||
HBASE-2691 LeaseStillHeldException totally ignored by RS, wrongly named
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-1760 Cleanup TODOs in HTable
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/**
|
||||
* Copyright 2010 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* This exception is thrown by the master when a region server was shut down
|
||||
* and restarted so fast that the master still hasn't processed the server
|
||||
* shutdown of the first instance.
|
||||
*/
|
||||
public class PleaseHoldException extends IOException {
|
||||
|
||||
public PleaseHoldException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/**
|
||||
* Copyright 2010 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* This exception is thrown by the master when a region server reports and is
|
||||
* already being processed as dead. This can happen when a region server loses
|
||||
* its session but didn't figure it yet.
|
||||
*/
|
||||
public class YouAreDeadException extends IOException {
|
||||
|
||||
public YouAreDeadException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
|
@ -679,11 +679,13 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
|
||||
/**
|
||||
* Override if you'd add messages to return to regionserver <code>hsi</code>
|
||||
* @param messages Messages to add to
|
||||
* or to send an exception.
|
||||
* @param msgs Messages to add to
|
||||
* @return Messages to return to
|
||||
* @throws IOException exceptions that were injected for the region servers
|
||||
*/
|
||||
protected HMsg [] adornRegionServerAnswer(final HServerInfo hsi,
|
||||
final HMsg [] msgs) {
|
||||
final HMsg [] msgs) throws IOException {
|
||||
return msgs;
|
||||
}
|
||||
|
||||
|
|
|
@ -30,8 +30,8 @@ import org.apache.hadoop.hbase.HRegionLocation;
|
|||
import org.apache.hadoop.hbase.HServerAddress;
|
||||
import org.apache.hadoop.hbase.HServerInfo;
|
||||
import org.apache.hadoop.hbase.HServerLoad;
|
||||
import org.apache.hadoop.hbase.Leases;
|
||||
import org.apache.hadoop.hbase.Leases.LeaseStillHeldException;
|
||||
import org.apache.hadoop.hbase.PleaseHoldException;
|
||||
import org.apache.hadoop.hbase.YouAreDeadException;
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
|
@ -177,13 +177,14 @@ public class ServerManager implements HConstants {
|
|||
String hostAndPort = info.getServerAddress().toString();
|
||||
HServerInfo existingServer = haveServerWithSameHostAndPortAlready(info.getHostnamePort());
|
||||
if (existingServer != null) {
|
||||
LOG.info("Server start rejected; we already have " + hostAndPort +
|
||||
" registered; existingServer=" + existingServer + ", newServer=" + info);
|
||||
String message = "Server start rejected; we already have " + hostAndPort +
|
||||
" registered; existingServer=" + existingServer + ", newServer=" + info;
|
||||
LOG.info(message);
|
||||
if (existingServer.getStartCode() < info.getStartCode()) {
|
||||
LOG.info("Triggering server recovery; existingServer looks stale");
|
||||
expireServer(existingServer);
|
||||
}
|
||||
throw new Leases.LeaseStillHeldException(hostAndPort);
|
||||
throw new PleaseHoldException(message);
|
||||
}
|
||||
checkIsDead(info.getServerName(), "STARTUP");
|
||||
LOG.info("Received start message from: " + info.getServerName());
|
||||
|
@ -208,11 +209,12 @@ public class ServerManager implements HConstants {
|
|||
* @throws LeaseStillHeldException
|
||||
*/
|
||||
private void checkIsDead(final String serverName, final String what)
|
||||
throws LeaseStillHeldException {
|
||||
throws YouAreDeadException {
|
||||
if (!isDead(serverName)) return;
|
||||
LOG.debug("Server " + what + " rejected; currently processing " +
|
||||
serverName + " as dead server");
|
||||
throw new Leases.LeaseStillHeldException(serverName);
|
||||
String message = "Server " + what + " rejected; currently processing " +
|
||||
serverName + " as dead server";
|
||||
LOG.debug(message);
|
||||
throw new YouAreDeadException(message);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -41,9 +41,11 @@ import org.apache.hadoop.hbase.Leases;
|
|||
import org.apache.hadoop.hbase.Leases.LeaseStillHeldException;
|
||||
import org.apache.hadoop.hbase.LocalHBaseCluster;
|
||||
import org.apache.hadoop.hbase.NotServingRegionException;
|
||||
import org.apache.hadoop.hbase.PleaseHoldException;
|
||||
import org.apache.hadoop.hbase.RemoteExceptionHandler;
|
||||
import org.apache.hadoop.hbase.UnknownRowLockException;
|
||||
import org.apache.hadoop.hbase.UnknownScannerException;
|
||||
import org.apache.hadoop.hbase.YouAreDeadException;
|
||||
import org.apache.hadoop.hbase.client.Delete;
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
import org.apache.hadoop.hbase.client.MultiPut;
|
||||
|
@ -524,9 +526,15 @@ public class HRegionServer implements HConstants, HRegionInterface,
|
|||
continue;
|
||||
}
|
||||
} catch (Exception e) { // FindBugs REC_CATCH_EXCEPTION
|
||||
// Two special exceptions could be printed out here,
|
||||
// PleaseHoldException and YouAreDeadException
|
||||
if (e instanceof IOException) {
|
||||
e = RemoteExceptionHandler.checkIOException((IOException) e);
|
||||
}
|
||||
if (e instanceof YouAreDeadException) {
|
||||
// This will be caught and handled as a fatal error below
|
||||
throw e;
|
||||
}
|
||||
tries++;
|
||||
if (tries > 0 && (tries % this.numRetries) == 0) {
|
||||
// Check filesystem every so often.
|
||||
|
|
|
@ -811,4 +811,20 @@ public class HBaseTestingUtility {
|
|||
Thread.sleep(500);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure that at least the specified number of region servers
|
||||
* are running
|
||||
* @param num minimum number of region servers that should be running
|
||||
* @throws IOException
|
||||
*/
|
||||
public void ensureSomeRegionServersAvailable(final int num)
|
||||
throws IOException {
|
||||
if (this.getHBaseCluster().getLiveRegionServerThreads().size() < num) {
|
||||
// Need at least "num" servers.
|
||||
LOG.info("Started new server=" +
|
||||
this.getHBaseCluster().startRegionServer());
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -78,6 +78,9 @@ public class MiniHBaseCluster implements HConstants {
|
|||
private final Map<HServerInfo, List<HMsg>> messages =
|
||||
new ConcurrentHashMap<HServerInfo, List<HMsg>>();
|
||||
|
||||
private final Map<HServerInfo, IOException> exceptions =
|
||||
new ConcurrentHashMap<HServerInfo, IOException>();
|
||||
|
||||
public MiniHBaseClusterMaster(final Configuration conf)
|
||||
throws IOException {
|
||||
super(conf);
|
||||
|
@ -99,9 +102,26 @@ public class MiniHBaseCluster implements HConstants {
|
|||
}
|
||||
}
|
||||
|
||||
void addException(final HServerInfo hsi, final IOException ex) {
|
||||
this.exceptions.put(hsi, ex);
|
||||
}
|
||||
|
||||
/**
|
||||
* This implementation is special, exceptions will be treated first and
|
||||
* message won't be sent back to the region servers even if some are
|
||||
* specified.
|
||||
* @param hsi the rs
|
||||
* @param msgs Messages to add to
|
||||
* @return
|
||||
* @throws IOException will be throw if any added for this region server
|
||||
*/
|
||||
@Override
|
||||
protected HMsg[] adornRegionServerAnswer(final HServerInfo hsi,
|
||||
final HMsg[] msgs) {
|
||||
final HMsg[] msgs) throws IOException {
|
||||
IOException ex = this.exceptions.remove(hsi);
|
||||
if (ex != null) {
|
||||
throw ex;
|
||||
}
|
||||
HMsg [] answerMsgs = msgs;
|
||||
synchronized (this.messages) {
|
||||
List<HMsg> hmsgs = this.messages.get(hsi);
|
||||
|
@ -384,6 +404,31 @@ public class MiniHBaseCluster implements HConstants {
|
|||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an exception to send when a region server checks back in
|
||||
* @param serverNumber Which server to send it to
|
||||
* @param ex The exception that will be sent
|
||||
* @throws IOException
|
||||
*/
|
||||
public void addExceptionToSendRegionServer(final int serverNumber,
|
||||
IOException ex) throws IOException {
|
||||
MiniHBaseClusterRegionServer hrs =
|
||||
(MiniHBaseClusterRegionServer)getRegionServer(serverNumber);
|
||||
addExceptionToSendRegionServer(hrs, ex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an exception to send when a region server checks back in
|
||||
* @param hrs Which server to send it to
|
||||
* @param ex The exception that will be sent
|
||||
* @throws IOException
|
||||
*/
|
||||
public void addExceptionToSendRegionServer(
|
||||
final MiniHBaseClusterRegionServer hrs, IOException ex)
|
||||
throws IOException {
|
||||
((MiniHBaseClusterMaster)getMaster()).addException(hrs.getHServerInfo(),ex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a message to include in the responses send a regionserver when it
|
||||
* checks back in.
|
||||
|
|
|
@ -29,17 +29,21 @@ import org.apache.hadoop.hbase.HServerAddress;
|
|||
import org.apache.hadoop.hbase.HServerInfo;
|
||||
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
||||
import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
|
||||
import org.apache.hadoop.hbase.YouAreDeadException;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestMasterWrongRS {
|
||||
public class TestKillingServersFromMaster {
|
||||
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||
private static MiniHBaseCluster cluster;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeAllTests() throws Exception {
|
||||
TEST_UTIL.startMiniCluster(3);
|
||||
TEST_UTIL.startMiniCluster(2);
|
||||
cluster = TEST_UTIL.getHBaseCluster();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
|
@ -47,26 +51,53 @@ public class TestMasterWrongRS {
|
|||
TEST_UTIL.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setup() throws IOException {
|
||||
TEST_UTIL.ensureSomeRegionServersAvailable(2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test when region servers start reporting with the wrong address
|
||||
* or start code. Currently the decision is to shut them down.
|
||||
* Test that a region server that reports with the wrong start code
|
||||
* gets shut down
|
||||
* See HBASE-2613
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test (timeout=180000)
|
||||
public void testRsReportsWrongServerName() throws Exception {
|
||||
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
|
||||
public void testRsReportsWrongStartCode() throws Exception {
|
||||
MiniHBaseClusterRegionServer firstServer =
|
||||
(MiniHBaseClusterRegionServer)cluster.getRegionServer(0);
|
||||
HRegionServer secondServer = cluster.getRegionServer(1);
|
||||
HServerInfo hsi = firstServer.getServerInfo();
|
||||
// This constructor creates a new startcode
|
||||
firstServer.setHServerInfo(new HServerInfo(hsi.getServerAddress(),
|
||||
hsi.getInfoPort(), hsi.getHostname()));
|
||||
|
||||
cluster.waitOnRegionServer(0);
|
||||
assertEquals(2, cluster.getLiveRegionServerThreads().size());
|
||||
assertEquals(1, cluster.getLiveRegionServerThreads().size());
|
||||
}
|
||||
|
||||
secondServer.getHServerInfo().setServerAddress(new HServerAddress("0.0.0.0", 60010));
|
||||
/**
|
||||
* Test that a region server that reports with the wrong address
|
||||
* gets shut down
|
||||
* See HBASE-2613
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test (timeout=180000)
|
||||
public void testRsReportsWrongAddress() throws Exception {
|
||||
MiniHBaseClusterRegionServer firstServer =
|
||||
(MiniHBaseClusterRegionServer)cluster.getRegionServer(0);
|
||||
firstServer.getHServerInfo().setServerAddress(
|
||||
new HServerAddress("0.0.0.0", 60010));
|
||||
cluster.waitOnRegionServer(0);
|
||||
assertEquals(1, cluster.getLiveRegionServerThreads().size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a YouAreDeadException to the region server and expect it to shut down
|
||||
* See HBASE-2691
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test (timeout=180000)
|
||||
public void testSendYouAreDead() throws Exception {
|
||||
cluster.addExceptionToSendRegionServer(0, new YouAreDeadException("bam!"));
|
||||
cluster.waitOnRegionServer(0);
|
||||
assertEquals(1, cluster.getLiveRegionServerThreads().size());
|
||||
}
|
|
@ -93,12 +93,7 @@ public class TestMasterTransitions {
|
|||
}
|
||||
|
||||
@Before public void setup() throws IOException {
|
||||
if (TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size() < 2) {
|
||||
// Need at least two servers.
|
||||
LOG.info("Started new server=" +
|
||||
TEST_UTIL.getHBaseCluster().startRegionServer());
|
||||
|
||||
}
|
||||
TEST_UTIL.ensureSomeRegionServersAvailable(2);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue