HBASE-4288 "Server not running" exception during meta verification causes RS abort

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1200648 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-11-10 23:58:06 +00:00
parent 04afdf9633
commit 274e7c2743
3 changed files with 161 additions and 76 deletions

View File

@ -773,6 +773,7 @@ Release 0.92.0 - Unreleased
(Jonathan Hsieh) (Jonathan Hsieh)
HBASE-4714 Don't ship w/ icms enabled by default HBASE-4714 Don't ship w/ icms enabled by default
HBASE-4747 Upgrade maven surefire plugin to 2.10 HBASE-4747 Upgrade maven surefire plugin to 2.10
HBASE-4288 "Server not running" exception during meta verification causes RS abort
NEW FEATURES NEW FEATURES
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via

View File

@ -306,7 +306,7 @@ public class CatalogTracker {
* @throws InterruptedException * @throws InterruptedException
* @throws NotAllMetaRegionsOnlineException if timed out waiting * @throws NotAllMetaRegionsOnlineException if timed out waiting
* @throws IOException * @throws IOException
* @deprecated Use {@link #getRootServerConnection(long)} * @deprecated Use #getRootServerConnection(long)
*/ */
public HRegionInterface waitForRootServerConnection(long timeout) public HRegionInterface waitForRootServerConnection(long timeout)
throws InterruptedException, NotAllMetaRegionsOnlineException, IOException { throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
@ -383,18 +383,17 @@ public class CatalogTracker {
// which we have to wait on. // which we have to wait on.
ServerName newLocation = ServerName newLocation =
MetaReader.readRegionLocation(this, META_REGION_NAME); MetaReader.readRegionLocation(this, META_REGION_NAME);
if (newLocation == null) { if (newLocation == null) return null;
LOG.debug(".META. server unavailable.");
return null;
}
HRegionInterface newConnection = getCachedConnection(newLocation); HRegionInterface newConnection = getCachedConnection(newLocation);
if (verifyRegionLocation(newConnection, newLocation, META_REGION_NAME)) { if (verifyRegionLocation(newConnection, newLocation, META_REGION_NAME)) {
setMetaLocation(newLocation); setMetaLocation(newLocation);
return newConnection; return newConnection;
} else { } else {
LOG.debug("new .META. server: " + newLocation + " isn't valid." + if (LOG.isTraceEnabled()) {
" Cached .META. server: " + this.metaLocation); LOG.trace("New .META. server: " + newLocation + " isn't valid." +
" Cached .META. server: " + this.metaLocation);
}
} }
return null; return null;
} }
@ -412,7 +411,7 @@ public class CatalogTracker {
try { try {
if (waitForMeta(100) != null) break; if (waitForMeta(100) != null) break;
} catch (NotAllMetaRegionsOnlineException e) { } catch (NotAllMetaRegionsOnlineException e) {
LOG.info("Retrying", e); if (LOG.isTraceEnabled()) LOG.trace("Retrying", e);
} catch (IOException e) { } catch (IOException e) {
LOG.info("Retrying", e); LOG.info("Retrying", e);
} }
@ -436,7 +435,7 @@ public class CatalogTracker {
public ServerName waitForMeta(long timeout) public ServerName waitForMeta(long timeout)
throws InterruptedException, IOException, NotAllMetaRegionsOnlineException { throws InterruptedException, IOException, NotAllMetaRegionsOnlineException {
long stop = System.currentTimeMillis() + timeout; long stop = System.currentTimeMillis() + timeout;
long waitTime = Math.min(500, timeout); long waitTime = Math.min(50, timeout);
synchronized (metaAvailable) { synchronized (metaAvailable) {
while(!stopped && (timeout == 0 || System.currentTimeMillis() < stop)) { while(!stopped && (timeout == 0 || System.currentTimeMillis() < stop)) {
if (getMetaServerConnection() != null) { if (getMetaServerConnection() != null) {
@ -499,15 +498,16 @@ public class CatalogTracker {
} }
/** /**
* Caller must be synchronized on this.metaAvailable
* @param metaLocation * @param metaLocation
*/ */
private void setMetaLocation(final ServerName metaLocation) { void setMetaLocation(final ServerName metaLocation) {
LOG.debug("set new cached META location: " + metaLocation); LOG.debug("Set new cached META location: " + metaLocation);
metaAvailable.set(true); synchronized (this.metaAvailable) {
this.metaLocation = metaLocation; this.metaLocation = metaLocation;
// no synchronization because these are private and already under lock this.metaAvailable.set(true);
this.metaAvailable.notifyAll(); // no synchronization because these are private and already under lock
this.metaAvailable.notifyAll();
}
} }
/** /**

View File

@ -95,11 +95,6 @@ public class TestCatalogTracker {
this.watcher.close(); this.watcher.close();
} }
private CatalogTracker constructAndStartCatalogTracker()
throws IOException, InterruptedException {
return constructAndStartCatalogTracker(null);
}
private CatalogTracker constructAndStartCatalogTracker(final HConnection c) private CatalogTracker constructAndStartCatalogTracker(final HConnection c)
throws IOException, InterruptedException { throws IOException, InterruptedException {
CatalogTracker ct = new CatalogTracker(this.watcher, null, c, CatalogTracker ct = new CatalogTracker(this.watcher, null, c,
@ -135,57 +130,103 @@ public class TestCatalogTracker {
*/ */
@Test public void testInterruptWaitOnMetaAndRoot() @Test public void testInterruptWaitOnMetaAndRoot()
throws IOException, InterruptedException { throws IOException, InterruptedException {
final CatalogTracker ct = constructAndStartCatalogTracker(); HConnection connection = mockConnection(null);
ServerName hsa = ct.getRootLocation(); try {
Assert.assertNull(hsa); final CatalogTracker ct = constructAndStartCatalogTracker(connection);
ServerName meta = ct.getMetaLocation(); ServerName hsa = ct.getRootLocation();
Assert.assertNull(meta); Assert.assertNull(hsa);
Thread t = new Thread() { ServerName meta = ct.getMetaLocation();
@Override Assert.assertNull(meta);
public void run() { Thread t = new Thread() {
try { @Override
ct.waitForMeta(); public void run() {
} catch (InterruptedException e) { try {
throw new RuntimeException("Interrupted", e); ct.waitForMeta();
} catch (InterruptedException e) {
throw new RuntimeException("Interrupted", e);
}
} }
} };
}; t.start();
t.start(); while (!t.isAlive())
while (!t.isAlive()) Threads.sleep(1); Threads.sleep(1);
Threads.sleep(1); Threads.sleep(1);
assertTrue(t.isAlive()); assertTrue(t.isAlive());
ct.stop(); ct.stop();
// Join the thread... should exit shortly. // Join the thread... should exit shortly.
t.join(); t.join();
} finally {
HConnectionManager.deleteConnection(UTIL.getConfiguration(), true);
}
} }
/**
* Test for HBASE-4288. Throw an IOE when trying to verify meta region and
* prove it doesn't cause master shutdown.
* @see <a href="https://issues.apache.org/jira/browse/HBASE-4288">HBASE-4288</a>
* @throws IOException
* @throws InterruptedException
* @throws KeeperException
*/
@Test @Test
public void testGetMetaServerConnectionFails() public void testServerNotRunningIOException()
throws IOException, InterruptedException, KeeperException { throws IOException, InterruptedException, KeeperException {
HConnection connection = // Mock an HRegionInterface.
HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration()); final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
HConnection connection = mockConnection(implementation);
try { try {
// Mock an HRegionInterface. // If a 'getRegionInfo' is called on mocked HRegionInterface, throw IOE
final HRegionInterface implementation = Mockito.mock(HRegionInterface.class); // the first time. 'Succeed' the second time we are called.
// If a 'get' is called on mocked interface, throw connection refused. Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())).
Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())). thenThrow(new IOException("Server not running, aborting")).
thenThrow(new ConnectException("Connection refused")); thenReturn(new HRegionInfo());
// Make it so our implementation is returned when we do a connection. // After we encounter the above 'Server not running', we should catch the
// Need to fake out the location lookup stuff first. // IOE and go into retrying for the meta mode. We'll do gets on -ROOT- to
ServerName sn = new ServerName("example.com", 1234, System.currentTimeMillis()); // get new meta location. Return something so this 'get' succeeds
final HRegionLocation anyLocation = // (here we mock up getRegionServerWithRetries, the wrapper around
new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, sn.getHostname(), // the actual get).
sn.getPort()); Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>)Mockito.any())).
Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(), thenReturn(getMetaTableRowResult());
(byte[]) Mockito.any(), Mockito.anyBoolean())).
thenReturn(anyLocation);
Mockito.when(connection.getHRegionConnection(Mockito.anyString(),
Mockito.anyInt(), Matchers.anyBoolean())).
thenReturn(implementation);
// Now start up the catalogtracker with our doctored Connection. // Now start up the catalogtracker with our doctored Connection.
final CatalogTracker ct = constructAndStartCatalogTracker(connection); final CatalogTracker ct = constructAndStartCatalogTracker(connection);
try { try {
RootLocationEditor.setRootLocation(this.watcher, sn); // Set a location for root and meta.
RootLocationEditor.setRootLocation(this.watcher, SN);
ct.setMetaLocation(SN);
// Call the method that HBASE-4288 calls.
Assert.assertFalse(ct.waitForMetaServerConnectionDefault() == null);
} finally {
// Clean out root and meta locations or later tests will be confused...
// they presume start fresh in zk.
ct.resetMetaLocation();
RootLocationEditor.deleteRootLocation(this.watcher);
}
} finally {
// Clear out our doctored connection or could mess up subsequent tests.
HConnectionManager.deleteConnection(UTIL.getConfiguration(), true);
}
}
/**
* Test we survive a connection refused {@link ConnectException}
* @throws IOException
* @throws InterruptedException
* @throws KeeperException
*/
@Test
public void testGetMetaServerConnectionFails()
throws IOException, InterruptedException, KeeperException {
// Mock an HRegionInterface.
final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
HConnection connection = mockConnection(implementation);
try {
// If a 'get' is called on mocked interface, throw connection refused.
Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())).
thenThrow(new ConnectException("Connection refused"));
// Now start up the catalogtracker with our doctored Connection.
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
try {
RootLocationEditor.setRootLocation(this.watcher, SN);
long timeout = UTIL.getConfiguration(). long timeout = UTIL.getConfiguration().
getLong("hbase.catalog.verification.timeout", 1000); getLong("hbase.catalog.verification.timeout", 1000);
Assert.assertFalse(ct.verifyMetaRegionLocation(timeout)); Assert.assertFalse(ct.verifyMetaRegionLocation(timeout));
@ -234,7 +275,8 @@ public class TestCatalogTracker {
@Test (expected = NotAllMetaRegionsOnlineException.class) @Test (expected = NotAllMetaRegionsOnlineException.class)
public void testTimeoutWaitForRoot() public void testTimeoutWaitForRoot()
throws IOException, InterruptedException { throws IOException, InterruptedException {
final CatalogTracker ct = constructAndStartCatalogTracker(); HConnection connection = Mockito.mock(HConnection.class);
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
ct.waitForRoot(100); ct.waitForRoot(100);
} }
@ -259,7 +301,8 @@ public class TestCatalogTracker {
*/ */
@Test public void testNoTimeoutWaitForRoot() @Test public void testNoTimeoutWaitForRoot()
throws IOException, InterruptedException, KeeperException { throws IOException, InterruptedException, KeeperException {
final CatalogTracker ct = constructAndStartCatalogTracker(); HConnection connection = Mockito.mock(HConnection.class);
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
ServerName hsa = ct.getRootLocation(); ServerName hsa = ct.getRootLocation();
Assert.assertNull(hsa); Assert.assertNull(hsa);
@ -328,17 +371,7 @@ public class TestCatalogTracker {
// like things have been getting assigned. Make it so we'll return a // like things have been getting assigned. Make it so we'll return a
// location (no matter what the Get is). Same for getHRegionInfo -- always // location (no matter what the Get is). Same for getHRegionInfo -- always
// just return the meta region. // just return the meta region.
List<KeyValue> kvs = new ArrayList<KeyValue>(); final Result result = getMetaTableRowResult();
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO)));
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
Bytes.toBytes(SN.getHostAndPort())));
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
Bytes.toBytes(SN.getStartcode())));
final Result result = new Result(kvs);
Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>)Mockito.any())). Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>)Mockito.any())).
thenReturn(result); thenReturn(result);
Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())). Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())).
@ -361,6 +394,57 @@ public class TestCatalogTracker {
} }
} }
/**
* @param implementation An {@link HRegionInterface} instance; you'll likely
* want to pass a mocked HRS; can be null.
* @return Mock up a connection that returns a {@link Configuration} when
* {@link HConnection#getConfiguration()} is called, a 'location' when
* {@link HConnection#getRegionLocation(byte[], byte[], boolean)} is called,
* and that returns the passed {@link HRegionInterface} instance when
* {@link HConnection#getHRegionConnection(String, int)}
* is called (Be sure call
* {@link HConnectionManager#deleteConnection(org.apache.hadoop.conf.Configuration, boolean)}
* when done with this mocked Connection.
* @throws IOException
*/
private HConnection mockConnection(final HRegionInterface implementation)
throws IOException {
HConnection connection =
HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration());
// Make it so we return any old location when asked.
final HRegionLocation anyLocation =
new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, SN.getHostname(),
SN.getPort());
Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(),
(byte[]) Mockito.any(), Mockito.anyBoolean())).
thenReturn(anyLocation);
if (implementation != null) {
// If a call to getHRegionConnection, return this implementation.
Mockito.when(connection.getHRegionConnection(Mockito.anyString(), Mockito.anyInt())).
thenReturn(implementation);
}
return connection;
}
/**
* @return A mocked up Result that fakes a Get on a row in the
* <code>.META.</code> table.
* @throws IOException
*/
private Result getMetaTableRowResult() throws IOException {
List<KeyValue> kvs = new ArrayList<KeyValue>();
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO)));
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
Bytes.toBytes(SN.getHostAndPort())));
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
Bytes.toBytes(SN.getStartcode())));
return new Result(kvs);
}
private void startWaitAliveThenWaitItLives(final Thread t, final int ms) { private void startWaitAliveThenWaitItLives(final Thread t, final int ms) {
t.start(); t.start();
while(!t.isAlive()) { while(!t.isAlive()) {