HBASE-2691 LeaseStillHeldException totally ignored by RS, wrongly named

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@952869 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jean-Daniel Cryans 2010-06-09 00:10:29 +00:00
parent dc641719fa
commit d3fb45f189
10 changed files with 196 additions and 28 deletions

View File

@ -379,6 +379,7 @@ Release 0.21.0 - Unreleased
HBASE-2676 TestInfoServers should use ephemeral ports
HBASE-2616 TestHRegion.testWritesWhileGetting flaky on trunk
HBASE-2684 TestMasterWrongRS flaky in trunk
HBASE-2691 LeaseStillHeldException totally ignored by RS, wrongly named
IMPROVEMENTS
HBASE-1760 Cleanup TODOs in HTable

View File

@ -0,0 +1,34 @@
/**
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.io.IOException;
/**
* This exception is thrown by the master when a region server was shut down
* and restarted so fast that the master still hasn't processed the server
* shutdown of the first instance.
*/
public class PleaseHoldException extends IOException {
public PleaseHoldException(String message) {
super(message);
}
}

View File

@ -0,0 +1,34 @@
/**
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.io.IOException;
/**
* This exception is thrown by the master when a region server reports and is
* already being processed as dead. This can happen when a region server loses
* its session but didn't figure it yet.
*/
public class YouAreDeadException extends IOException {
public YouAreDeadException(String message) {
super(message);
}
}

View File

@ -679,11 +679,13 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
/**
* Override if you'd add messages to return to regionserver <code>hsi</code>
* @param messages Messages to add to
* or to send an exception.
* @param msgs Messages to add to
* @return Messages to return to
* @throws IOException exceptions that were injected for the region servers
*/
protected HMsg [] adornRegionServerAnswer(final HServerInfo hsi,
final HMsg [] msgs) {
final HMsg [] msgs) throws IOException {
return msgs;
}

View File

@ -30,8 +30,8 @@ import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.HServerLoad;
import org.apache.hadoop.hbase.Leases;
import org.apache.hadoop.hbase.Leases.LeaseStillHeldException;
import org.apache.hadoop.hbase.PleaseHoldException;
import org.apache.hadoop.hbase.YouAreDeadException;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
@ -177,13 +177,14 @@ public class ServerManager implements HConstants {
String hostAndPort = info.getServerAddress().toString();
HServerInfo existingServer = haveServerWithSameHostAndPortAlready(info.getHostnamePort());
if (existingServer != null) {
LOG.info("Server start rejected; we already have " + hostAndPort +
" registered; existingServer=" + existingServer + ", newServer=" + info);
String message = "Server start rejected; we already have " + hostAndPort +
" registered; existingServer=" + existingServer + ", newServer=" + info;
LOG.info(message);
if (existingServer.getStartCode() < info.getStartCode()) {
LOG.info("Triggering server recovery; existingServer looks stale");
expireServer(existingServer);
}
throw new Leases.LeaseStillHeldException(hostAndPort);
throw new PleaseHoldException(message);
}
checkIsDead(info.getServerName(), "STARTUP");
LOG.info("Received start message from: " + info.getServerName());
@ -208,11 +209,12 @@ public class ServerManager implements HConstants {
* @throws LeaseStillHeldException
*/
private void checkIsDead(final String serverName, final String what)
throws LeaseStillHeldException {
throws YouAreDeadException {
if (!isDead(serverName)) return;
LOG.debug("Server " + what + " rejected; currently processing " +
serverName + " as dead server");
throw new Leases.LeaseStillHeldException(serverName);
String message = "Server " + what + " rejected; currently processing " +
serverName + " as dead server";
LOG.debug(message);
throw new YouAreDeadException(message);
}
/**

View File

@ -41,9 +41,11 @@ import org.apache.hadoop.hbase.Leases;
import org.apache.hadoop.hbase.Leases.LeaseStillHeldException;
import org.apache.hadoop.hbase.LocalHBaseCluster;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.PleaseHoldException;
import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hbase.UnknownRowLockException;
import org.apache.hadoop.hbase.UnknownScannerException;
import org.apache.hadoop.hbase.YouAreDeadException;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.MultiPut;
@ -524,9 +526,15 @@ public class HRegionServer implements HConstants, HRegionInterface,
continue;
}
} catch (Exception e) { // FindBugs REC_CATCH_EXCEPTION
// Two special exceptions could be printed out here,
// PleaseHoldException and YouAreDeadException
if (e instanceof IOException) {
e = RemoteExceptionHandler.checkIOException((IOException) e);
}
if (e instanceof YouAreDeadException) {
// This will be caught and handled as a fatal error below
throw e;
}
tries++;
if (tries > 0 && (tries % this.numRetries) == 0) {
// Check filesystem every so often.

View File

@ -811,4 +811,20 @@ public class HBaseTestingUtility {
Thread.sleep(500);
}
}
/**
* Make sure that at least the specified number of region servers
* are running
* @param num minimum number of region servers that should be running
* @throws IOException
*/
public void ensureSomeRegionServersAvailable(final int num)
throws IOException {
if (this.getHBaseCluster().getLiveRegionServerThreads().size() < num) {
// Need at least "num" servers.
LOG.info("Started new server=" +
this.getHBaseCluster().startRegionServer());
}
}
}

View File

@ -78,6 +78,9 @@ public class MiniHBaseCluster implements HConstants {
private final Map<HServerInfo, List<HMsg>> messages =
new ConcurrentHashMap<HServerInfo, List<HMsg>>();
private final Map<HServerInfo, IOException> exceptions =
new ConcurrentHashMap<HServerInfo, IOException>();
public MiniHBaseClusterMaster(final Configuration conf)
throws IOException {
super(conf);
@ -99,9 +102,26 @@ public class MiniHBaseCluster implements HConstants {
}
}
void addException(final HServerInfo hsi, final IOException ex) {
this.exceptions.put(hsi, ex);
}
/**
* This implementation is special, exceptions will be treated first and
* message won't be sent back to the region servers even if some are
* specified.
* @param hsi the rs
* @param msgs Messages to add to
* @return
* @throws IOException will be throw if any added for this region server
*/
@Override
protected HMsg[] adornRegionServerAnswer(final HServerInfo hsi,
final HMsg[] msgs) {
final HMsg[] msgs) throws IOException {
IOException ex = this.exceptions.remove(hsi);
if (ex != null) {
throw ex;
}
HMsg [] answerMsgs = msgs;
synchronized (this.messages) {
List<HMsg> hmsgs = this.messages.get(hsi);
@ -384,6 +404,31 @@ public class MiniHBaseCluster implements HConstants {
return index;
}
/**
* Add an exception to send when a region server checks back in
* @param serverNumber Which server to send it to
* @param ex The exception that will be sent
* @throws IOException
*/
public void addExceptionToSendRegionServer(final int serverNumber,
IOException ex) throws IOException {
MiniHBaseClusterRegionServer hrs =
(MiniHBaseClusterRegionServer)getRegionServer(serverNumber);
addExceptionToSendRegionServer(hrs, ex);
}
/**
* Add an exception to send when a region server checks back in
* @param hrs Which server to send it to
* @param ex The exception that will be sent
* @throws IOException
*/
public void addExceptionToSendRegionServer(
final MiniHBaseClusterRegionServer hrs, IOException ex)
throws IOException {
((MiniHBaseClusterMaster)getMaster()).addException(hrs.getHServerInfo(),ex);
}
/**
* Add a message to include in the responses send a regionserver when it
* checks back in.

View File

@ -29,17 +29,21 @@ import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
import org.apache.hadoop.hbase.YouAreDeadException;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestMasterWrongRS {
public class TestKillingServersFromMaster {
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static MiniHBaseCluster cluster;
@BeforeClass
public static void beforeAllTests() throws Exception {
TEST_UTIL.startMiniCluster(3);
TEST_UTIL.startMiniCluster(2);
cluster = TEST_UTIL.getHBaseCluster();
}
@AfterClass
@ -47,26 +51,53 @@ public class TestMasterWrongRS {
TEST_UTIL.shutdownMiniCluster();
}
@Before
public void setup() throws IOException {
TEST_UTIL.ensureSomeRegionServersAvailable(2);
}
/**
* Test when region servers start reporting with the wrong address
* or start code. Currently the decision is to shut them down.
* Test that a region server that reports with the wrong start code
* gets shut down
* See HBASE-2613
* @throws Exception
*/
@Test (timeout=180000)
public void testRsReportsWrongServerName() throws Exception {
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
public void testRsReportsWrongStartCode() throws Exception {
MiniHBaseClusterRegionServer firstServer =
(MiniHBaseClusterRegionServer)cluster.getRegionServer(0);
HRegionServer secondServer = cluster.getRegionServer(1);
HServerInfo hsi = firstServer.getServerInfo();
// This constructor creates a new startcode
firstServer.setHServerInfo(new HServerInfo(hsi.getServerAddress(),
hsi.getInfoPort(), hsi.getHostname()));
cluster.waitOnRegionServer(0);
assertEquals(2, cluster.getLiveRegionServerThreads().size());
assertEquals(1, cluster.getLiveRegionServerThreads().size());
}
secondServer.getHServerInfo().setServerAddress(new HServerAddress("0.0.0.0", 60010));
/**
* Test that a region server that reports with the wrong address
* gets shut down
* See HBASE-2613
* @throws Exception
*/
@Test (timeout=180000)
public void testRsReportsWrongAddress() throws Exception {
MiniHBaseClusterRegionServer firstServer =
(MiniHBaseClusterRegionServer)cluster.getRegionServer(0);
firstServer.getHServerInfo().setServerAddress(
new HServerAddress("0.0.0.0", 60010));
cluster.waitOnRegionServer(0);
assertEquals(1, cluster.getLiveRegionServerThreads().size());
}
/**
* Send a YouAreDeadException to the region server and expect it to shut down
* See HBASE-2691
* @throws Exception
*/
@Test (timeout=180000)
public void testSendYouAreDead() throws Exception {
cluster.addExceptionToSendRegionServer(0, new YouAreDeadException("bam!"));
cluster.waitOnRegionServer(0);
assertEquals(1, cluster.getLiveRegionServerThreads().size());
}

View File

@ -93,12 +93,7 @@ public class TestMasterTransitions {
}
@Before public void setup() throws IOException {
if (TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size() < 2) {
// Need at least two servers.
LOG.info("Started new server=" +
TEST_UTIL.getHBaseCluster().startRegionServer());
}
TEST_UTIL.ensureSomeRegionServersAvailable(2);
}
/**