HBASE-4288 "Server not running" exception during meta verification causes RS abort

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1200648 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-11-10 23:58:06 +00:00
parent 04afdf9633
commit 274e7c2743
3 changed files with 161 additions and 76 deletions

View File

@ -773,6 +773,7 @@ Release 0.92.0 - Unreleased
(Jonathan Hsieh)
HBASE-4714 Don't ship w/ icms enabled by default
HBASE-4747 Upgrade maven surefire plugin to 2.10
HBASE-4288 "Server not running" exception during meta verification causes RS abort
NEW FEATURES
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via

View File

@ -306,7 +306,7 @@ public class CatalogTracker {
* @throws InterruptedException
* @throws NotAllMetaRegionsOnlineException if timed out waiting
* @throws IOException
* @deprecated Use {@link #getRootServerConnection(long)}
* @deprecated Use #getRootServerConnection(long)
*/
public HRegionInterface waitForRootServerConnection(long timeout)
throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
@ -383,19 +383,18 @@ public class CatalogTracker {
// which we have to wait on.
ServerName newLocation =
MetaReader.readRegionLocation(this, META_REGION_NAME);
if (newLocation == null) {
LOG.debug(".META. server unavailable.");
return null;
}
if (newLocation == null) return null;
HRegionInterface newConnection = getCachedConnection(newLocation);
if (verifyRegionLocation(newConnection, newLocation, META_REGION_NAME)) {
setMetaLocation(newLocation);
return newConnection;
} else {
LOG.debug("new .META. server: " + newLocation + " isn't valid." +
if (LOG.isTraceEnabled()) {
LOG.trace("New .META. server: " + newLocation + " isn't valid." +
" Cached .META. server: " + this.metaLocation);
}
}
return null;
}
}
@ -412,7 +411,7 @@ public class CatalogTracker {
try {
if (waitForMeta(100) != null) break;
} catch (NotAllMetaRegionsOnlineException e) {
LOG.info("Retrying", e);
if (LOG.isTraceEnabled()) LOG.trace("Retrying", e);
} catch (IOException e) {
LOG.info("Retrying", e);
}
@ -436,7 +435,7 @@ public class CatalogTracker {
public ServerName waitForMeta(long timeout)
throws InterruptedException, IOException, NotAllMetaRegionsOnlineException {
long stop = System.currentTimeMillis() + timeout;
long waitTime = Math.min(500, timeout);
long waitTime = Math.min(50, timeout);
synchronized (metaAvailable) {
while(!stopped && (timeout == 0 || System.currentTimeMillis() < stop)) {
if (getMetaServerConnection() != null) {
@ -499,16 +498,17 @@ public class CatalogTracker {
}
/**
* Caller must be synchronized on this.metaAvailable
* @param metaLocation
*/
private void setMetaLocation(final ServerName metaLocation) {
LOG.debug("set new cached META location: " + metaLocation);
metaAvailable.set(true);
void setMetaLocation(final ServerName metaLocation) {
LOG.debug("Set new cached META location: " + metaLocation);
synchronized (this.metaAvailable) {
this.metaLocation = metaLocation;
this.metaAvailable.set(true);
// no synchronization because these are private and already under lock
this.metaAvailable.notifyAll();
}
}
/**
* @param sn ServerName to get a connection against.

View File

@ -95,11 +95,6 @@ public class TestCatalogTracker {
this.watcher.close();
}
private CatalogTracker constructAndStartCatalogTracker()
throws IOException, InterruptedException {
return constructAndStartCatalogTracker(null);
}
private CatalogTracker constructAndStartCatalogTracker(final HConnection c)
throws IOException, InterruptedException {
CatalogTracker ct = new CatalogTracker(this.watcher, null, c,
@ -135,7 +130,9 @@ public class TestCatalogTracker {
*/
@Test public void testInterruptWaitOnMetaAndRoot()
throws IOException, InterruptedException {
final CatalogTracker ct = constructAndStartCatalogTracker();
HConnection connection = mockConnection(null);
try {
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
ServerName hsa = ct.getRootLocation();
Assert.assertNull(hsa);
ServerName meta = ct.getMetaLocation();
@ -151,41 +148,85 @@ public class TestCatalogTracker {
}
};
t.start();
while (!t.isAlive()) Threads.sleep(1);
while (!t.isAlive())
Threads.sleep(1);
Threads.sleep(1);
assertTrue(t.isAlive());
ct.stop();
// Join the thread... should exit shortly.
t.join();
} finally {
HConnectionManager.deleteConnection(UTIL.getConfiguration(), true);
}
}
/**
* Test for HBASE-4288. Throw an IOE when trying to verify meta region and
* prove it doesn't cause master shutdown.
* @see <a href="https://issues.apache.org/jira/browse/HBASE-4288">HBASE-4288</a>
* @throws IOException
* @throws InterruptedException
* @throws KeeperException
*/
@Test
public void testGetMetaServerConnectionFails()
public void testServerNotRunningIOException()
throws IOException, InterruptedException, KeeperException {
HConnection connection =
HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration());
try {
// Mock an HRegionInterface.
final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
// If a 'get' is called on mocked interface, throw connection refused.
Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())).
thenThrow(new ConnectException("Connection refused"));
// Make it so our implementation is returned when we do a connection.
// Need to fake out the location lookup stuff first.
ServerName sn = new ServerName("example.com", 1234, System.currentTimeMillis());
final HRegionLocation anyLocation =
new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, sn.getHostname(),
sn.getPort());
Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(),
(byte[]) Mockito.any(), Mockito.anyBoolean())).
thenReturn(anyLocation);
Mockito.when(connection.getHRegionConnection(Mockito.anyString(),
Mockito.anyInt(), Matchers.anyBoolean())).
thenReturn(implementation);
HConnection connection = mockConnection(implementation);
try {
// If a 'getRegionInfo' is called on mocked HRegionInterface, throw IOE
// the first time. 'Succeed' the second time we are called.
Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())).
thenThrow(new IOException("Server not running, aborting")).
thenReturn(new HRegionInfo());
// After we encounter the above 'Server not running', we should catch the
// IOE and go into retrying for the meta mode. We'll do gets on -ROOT- to
// get new meta location. Return something so this 'get' succeeds
// (here we mock up getRegionServerWithRetries, the wrapper around
// the actual get).
Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>)Mockito.any())).
thenReturn(getMetaTableRowResult());
// Now start up the catalogtracker with our doctored Connection.
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
try {
RootLocationEditor.setRootLocation(this.watcher, sn);
// Set a location for root and meta.
RootLocationEditor.setRootLocation(this.watcher, SN);
ct.setMetaLocation(SN);
// Call the method that HBASE-4288 calls.
Assert.assertFalse(ct.waitForMetaServerConnectionDefault() == null);
} finally {
// Clean out root and meta locations or later tests will be confused...
// they presume start fresh in zk.
ct.resetMetaLocation();
RootLocationEditor.deleteRootLocation(this.watcher);
}
} finally {
// Clear out our doctored connection or could mess up subsequent tests.
HConnectionManager.deleteConnection(UTIL.getConfiguration(), true);
}
}
/**
* Test we survive a connection refused {@link ConnectException}
* @throws IOException
* @throws InterruptedException
* @throws KeeperException
*/
@Test
public void testGetMetaServerConnectionFails()
throws IOException, InterruptedException, KeeperException {
// Mock an HRegionInterface.
final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
HConnection connection = mockConnection(implementation);
try {
// If a 'get' is called on mocked interface, throw connection refused.
Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())).
thenThrow(new ConnectException("Connection refused"));
// Now start up the catalogtracker with our doctored Connection.
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
try {
RootLocationEditor.setRootLocation(this.watcher, SN);
long timeout = UTIL.getConfiguration().
getLong("hbase.catalog.verification.timeout", 1000);
Assert.assertFalse(ct.verifyMetaRegionLocation(timeout));
@ -234,7 +275,8 @@ public class TestCatalogTracker {
@Test (expected = NotAllMetaRegionsOnlineException.class)
public void testTimeoutWaitForRoot()
throws IOException, InterruptedException {
final CatalogTracker ct = constructAndStartCatalogTracker();
HConnection connection = Mockito.mock(HConnection.class);
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
ct.waitForRoot(100);
}
@ -259,7 +301,8 @@ public class TestCatalogTracker {
*/
@Test public void testNoTimeoutWaitForRoot()
throws IOException, InterruptedException, KeeperException {
final CatalogTracker ct = constructAndStartCatalogTracker();
HConnection connection = Mockito.mock(HConnection.class);
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
ServerName hsa = ct.getRootLocation();
Assert.assertNull(hsa);
@ -328,17 +371,7 @@ public class TestCatalogTracker {
// like things have been getting assigned. Make it so we'll return a
// location (no matter what the Get is). Same for getHRegionInfo -- always
// just return the meta region.
List<KeyValue> kvs = new ArrayList<KeyValue>();
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO)));
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
Bytes.toBytes(SN.getHostAndPort())));
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
Bytes.toBytes(SN.getStartcode())));
final Result result = new Result(kvs);
final Result result = getMetaTableRowResult();
Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>)Mockito.any())).
thenReturn(result);
Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())).
@ -361,6 +394,57 @@ public class TestCatalogTracker {
}
}
/**
* @param implementation An {@link HRegionInterface} instance; you'll likely
* want to pass a mocked HRS; can be null.
* @return Mock up a connection that returns a {@link Configuration} when
* {@link HConnection#getConfiguration()} is called, a 'location' when
* {@link HConnection#getRegionLocation(byte[], byte[], boolean)} is called,
* and that returns the passed {@link HRegionInterface} instance when
* {@link HConnection#getHRegionConnection(String, int)}
* is called (Be sure call
* {@link HConnectionManager#deleteConnection(org.apache.hadoop.conf.Configuration, boolean)}
* when done with this mocked Connection.
* @throws IOException
*/
private HConnection mockConnection(final HRegionInterface implementation)
throws IOException {
HConnection connection =
HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration());
// Make it so we return any old location when asked.
final HRegionLocation anyLocation =
new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, SN.getHostname(),
SN.getPort());
Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(),
(byte[]) Mockito.any(), Mockito.anyBoolean())).
thenReturn(anyLocation);
if (implementation != null) {
// If a call to getHRegionConnection, return this implementation.
Mockito.when(connection.getHRegionConnection(Mockito.anyString(), Mockito.anyInt())).
thenReturn(implementation);
}
return connection;
}
/**
* @return A mocked up Result that fakes a Get on a row in the
* <code>.META.</code> table.
* @throws IOException
*/
private Result getMetaTableRowResult() throws IOException {
List<KeyValue> kvs = new ArrayList<KeyValue>();
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO)));
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
Bytes.toBytes(SN.getHostAndPort())));
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
Bytes.toBytes(SN.getStartcode())));
return new Result(kvs);
}
private void startWaitAliveThenWaitItLives(final Thread t, final int ms) {
t.start();
while(!t.isAlive()) {