HBASE-4288 "Server not running" exception during meta verification causes RS abort
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1200648 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
04afdf9633
commit
274e7c2743
|
@ -773,6 +773,7 @@ Release 0.92.0 - Unreleased
|
|||
(Jonathan Hsieh)
|
||||
HBASE-4714 Don't ship w/ icms enabled by default
|
||||
HBASE-4747 Upgrade maven surefire plugin to 2.10
|
||||
HBASE-4288 "Server not running" exception during meta verification causes RS abort
|
||||
|
||||
NEW FEATURES
|
||||
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via
|
||||
|
|
|
@ -306,7 +306,7 @@ public class CatalogTracker {
|
|||
* @throws InterruptedException
|
||||
* @throws NotAllMetaRegionsOnlineException if timed out waiting
|
||||
* @throws IOException
|
||||
* @deprecated Use {@link #getRootServerConnection(long)}
|
||||
* @deprecated Use #getRootServerConnection(long)
|
||||
*/
|
||||
public HRegionInterface waitForRootServerConnection(long timeout)
|
||||
throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
|
||||
|
@ -383,18 +383,17 @@ public class CatalogTracker {
|
|||
// which we have to wait on.
|
||||
ServerName newLocation =
|
||||
MetaReader.readRegionLocation(this, META_REGION_NAME);
|
||||
if (newLocation == null) {
|
||||
LOG.debug(".META. server unavailable.");
|
||||
return null;
|
||||
}
|
||||
if (newLocation == null) return null;
|
||||
|
||||
HRegionInterface newConnection = getCachedConnection(newLocation);
|
||||
if (verifyRegionLocation(newConnection, newLocation, META_REGION_NAME)) {
|
||||
setMetaLocation(newLocation);
|
||||
return newConnection;
|
||||
} else {
|
||||
LOG.debug("new .META. server: " + newLocation + " isn't valid." +
|
||||
" Cached .META. server: " + this.metaLocation);
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("New .META. server: " + newLocation + " isn't valid." +
|
||||
" Cached .META. server: " + this.metaLocation);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
@ -412,7 +411,7 @@ public class CatalogTracker {
|
|||
try {
|
||||
if (waitForMeta(100) != null) break;
|
||||
} catch (NotAllMetaRegionsOnlineException e) {
|
||||
LOG.info("Retrying", e);
|
||||
if (LOG.isTraceEnabled()) LOG.trace("Retrying", e);
|
||||
} catch (IOException e) {
|
||||
LOG.info("Retrying", e);
|
||||
}
|
||||
|
@ -436,7 +435,7 @@ public class CatalogTracker {
|
|||
public ServerName waitForMeta(long timeout)
|
||||
throws InterruptedException, IOException, NotAllMetaRegionsOnlineException {
|
||||
long stop = System.currentTimeMillis() + timeout;
|
||||
long waitTime = Math.min(500, timeout);
|
||||
long waitTime = Math.min(50, timeout);
|
||||
synchronized (metaAvailable) {
|
||||
while(!stopped && (timeout == 0 || System.currentTimeMillis() < stop)) {
|
||||
if (getMetaServerConnection() != null) {
|
||||
|
@ -499,15 +498,16 @@ public class CatalogTracker {
|
|||
}
|
||||
|
||||
/**
|
||||
* Caller must be synchronized on this.metaAvailable
|
||||
* @param metaLocation
|
||||
*/
|
||||
private void setMetaLocation(final ServerName metaLocation) {
|
||||
LOG.debug("set new cached META location: " + metaLocation);
|
||||
metaAvailable.set(true);
|
||||
this.metaLocation = metaLocation;
|
||||
// no synchronization because these are private and already under lock
|
||||
this.metaAvailable.notifyAll();
|
||||
void setMetaLocation(final ServerName metaLocation) {
|
||||
LOG.debug("Set new cached META location: " + metaLocation);
|
||||
synchronized (this.metaAvailable) {
|
||||
this.metaLocation = metaLocation;
|
||||
this.metaAvailable.set(true);
|
||||
// no synchronization because these are private and already under lock
|
||||
this.metaAvailable.notifyAll();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -95,11 +95,6 @@ public class TestCatalogTracker {
|
|||
this.watcher.close();
|
||||
}
|
||||
|
||||
private CatalogTracker constructAndStartCatalogTracker()
|
||||
throws IOException, InterruptedException {
|
||||
return constructAndStartCatalogTracker(null);
|
||||
}
|
||||
|
||||
private CatalogTracker constructAndStartCatalogTracker(final HConnection c)
|
||||
throws IOException, InterruptedException {
|
||||
CatalogTracker ct = new CatalogTracker(this.watcher, null, c,
|
||||
|
@ -135,57 +130,103 @@ public class TestCatalogTracker {
|
|||
*/
|
||||
@Test public void testInterruptWaitOnMetaAndRoot()
|
||||
throws IOException, InterruptedException {
|
||||
final CatalogTracker ct = constructAndStartCatalogTracker();
|
||||
ServerName hsa = ct.getRootLocation();
|
||||
Assert.assertNull(hsa);
|
||||
ServerName meta = ct.getMetaLocation();
|
||||
Assert.assertNull(meta);
|
||||
Thread t = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
ct.waitForMeta();
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException("Interrupted", e);
|
||||
HConnection connection = mockConnection(null);
|
||||
try {
|
||||
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
||||
ServerName hsa = ct.getRootLocation();
|
||||
Assert.assertNull(hsa);
|
||||
ServerName meta = ct.getMetaLocation();
|
||||
Assert.assertNull(meta);
|
||||
Thread t = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
ct.waitForMeta();
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException("Interrupted", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
t.start();
|
||||
while (!t.isAlive()) Threads.sleep(1);
|
||||
Threads.sleep(1);
|
||||
assertTrue(t.isAlive());
|
||||
ct.stop();
|
||||
// Join the thread... should exit shortly.
|
||||
t.join();
|
||||
};
|
||||
t.start();
|
||||
while (!t.isAlive())
|
||||
Threads.sleep(1);
|
||||
Threads.sleep(1);
|
||||
assertTrue(t.isAlive());
|
||||
ct.stop();
|
||||
// Join the thread... should exit shortly.
|
||||
t.join();
|
||||
} finally {
|
||||
HConnectionManager.deleteConnection(UTIL.getConfiguration(), true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for HBASE-4288. Throw an IOE when trying to verify meta region and
|
||||
* prove it doesn't cause master shutdown.
|
||||
* @see <a href="https://issues.apache.org/jira/browse/HBASE-4288">HBASE-4288</a>
|
||||
* @throws IOException
|
||||
* @throws InterruptedException
|
||||
* @throws KeeperException
|
||||
*/
|
||||
@Test
|
||||
public void testGetMetaServerConnectionFails()
|
||||
public void testServerNotRunningIOException()
|
||||
throws IOException, InterruptedException, KeeperException {
|
||||
HConnection connection =
|
||||
HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration());
|
||||
// Mock an HRegionInterface.
|
||||
final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
|
||||
HConnection connection = mockConnection(implementation);
|
||||
try {
|
||||
// Mock an HRegionInterface.
|
||||
final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
|
||||
// If a 'get' is called on mocked interface, throw connection refused.
|
||||
Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())).
|
||||
thenThrow(new ConnectException("Connection refused"));
|
||||
// Make it so our implementation is returned when we do a connection.
|
||||
// Need to fake out the location lookup stuff first.
|
||||
ServerName sn = new ServerName("example.com", 1234, System.currentTimeMillis());
|
||||
final HRegionLocation anyLocation =
|
||||
new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, sn.getHostname(),
|
||||
sn.getPort());
|
||||
Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(),
|
||||
(byte[]) Mockito.any(), Mockito.anyBoolean())).
|
||||
thenReturn(anyLocation);
|
||||
Mockito.when(connection.getHRegionConnection(Mockito.anyString(),
|
||||
Mockito.anyInt(), Matchers.anyBoolean())).
|
||||
thenReturn(implementation);
|
||||
// If a 'getRegionInfo' is called on mocked HRegionInterface, throw IOE
|
||||
// the first time. 'Succeed' the second time we are called.
|
||||
Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())).
|
||||
thenThrow(new IOException("Server not running, aborting")).
|
||||
thenReturn(new HRegionInfo());
|
||||
// After we encounter the above 'Server not running', we should catch the
|
||||
// IOE and go into retrying for the meta mode. We'll do gets on -ROOT- to
|
||||
// get new meta location. Return something so this 'get' succeeds
|
||||
// (here we mock up getRegionServerWithRetries, the wrapper around
|
||||
// the actual get).
|
||||
Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>)Mockito.any())).
|
||||
thenReturn(getMetaTableRowResult());
|
||||
// Now start up the catalogtracker with our doctored Connection.
|
||||
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
||||
try {
|
||||
RootLocationEditor.setRootLocation(this.watcher, sn);
|
||||
// Set a location for root and meta.
|
||||
RootLocationEditor.setRootLocation(this.watcher, SN);
|
||||
ct.setMetaLocation(SN);
|
||||
// Call the method that HBASE-4288 calls.
|
||||
Assert.assertFalse(ct.waitForMetaServerConnectionDefault() == null);
|
||||
} finally {
|
||||
// Clean out root and meta locations or later tests will be confused...
|
||||
// they presume start fresh in zk.
|
||||
ct.resetMetaLocation();
|
||||
RootLocationEditor.deleteRootLocation(this.watcher);
|
||||
}
|
||||
} finally {
|
||||
// Clear out our doctored connection or could mess up subsequent tests.
|
||||
HConnectionManager.deleteConnection(UTIL.getConfiguration(), true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test we survive a connection refused {@link ConnectException}
|
||||
* @throws IOException
|
||||
* @throws InterruptedException
|
||||
* @throws KeeperException
|
||||
*/
|
||||
@Test
|
||||
public void testGetMetaServerConnectionFails()
|
||||
throws IOException, InterruptedException, KeeperException {
|
||||
// Mock an HRegionInterface.
|
||||
final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
|
||||
HConnection connection = mockConnection(implementation);
|
||||
try {
|
||||
// If a 'get' is called on mocked interface, throw connection refused.
|
||||
Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())).
|
||||
thenThrow(new ConnectException("Connection refused"));
|
||||
// Now start up the catalogtracker with our doctored Connection.
|
||||
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
||||
try {
|
||||
RootLocationEditor.setRootLocation(this.watcher, SN);
|
||||
long timeout = UTIL.getConfiguration().
|
||||
getLong("hbase.catalog.verification.timeout", 1000);
|
||||
Assert.assertFalse(ct.verifyMetaRegionLocation(timeout));
|
||||
|
@ -234,7 +275,8 @@ public class TestCatalogTracker {
|
|||
@Test (expected = NotAllMetaRegionsOnlineException.class)
|
||||
public void testTimeoutWaitForRoot()
|
||||
throws IOException, InterruptedException {
|
||||
final CatalogTracker ct = constructAndStartCatalogTracker();
|
||||
HConnection connection = Mockito.mock(HConnection.class);
|
||||
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
||||
ct.waitForRoot(100);
|
||||
}
|
||||
|
||||
|
@ -259,7 +301,8 @@ public class TestCatalogTracker {
|
|||
*/
|
||||
@Test public void testNoTimeoutWaitForRoot()
|
||||
throws IOException, InterruptedException, KeeperException {
|
||||
final CatalogTracker ct = constructAndStartCatalogTracker();
|
||||
HConnection connection = Mockito.mock(HConnection.class);
|
||||
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
||||
ServerName hsa = ct.getRootLocation();
|
||||
Assert.assertNull(hsa);
|
||||
|
||||
|
@ -328,17 +371,7 @@ public class TestCatalogTracker {
|
|||
// like things have been getting assigned. Make it so we'll return a
|
||||
// location (no matter what the Get is). Same for getHRegionInfo -- always
|
||||
// just return the meta region.
|
||||
List<KeyValue> kvs = new ArrayList<KeyValue>();
|
||||
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
||||
HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
|
||||
Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO)));
|
||||
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
||||
HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
|
||||
Bytes.toBytes(SN.getHostAndPort())));
|
||||
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
||||
HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
|
||||
Bytes.toBytes(SN.getStartcode())));
|
||||
final Result result = new Result(kvs);
|
||||
final Result result = getMetaTableRowResult();
|
||||
Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>)Mockito.any())).
|
||||
thenReturn(result);
|
||||
Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())).
|
||||
|
@ -361,6 +394,57 @@ public class TestCatalogTracker {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param implementation An {@link HRegionInterface} instance; you'll likely
|
||||
* want to pass a mocked HRS; can be null.
|
||||
* @return Mock up a connection that returns a {@link Configuration} when
|
||||
* {@link HConnection#getConfiguration()} is called, a 'location' when
|
||||
* {@link HConnection#getRegionLocation(byte[], byte[], boolean)} is called,
|
||||
* and that returns the passed {@link HRegionInterface} instance when
|
||||
* {@link HConnection#getHRegionConnection(String, int)}
|
||||
* is called (Be sure call
|
||||
* {@link HConnectionManager#deleteConnection(org.apache.hadoop.conf.Configuration, boolean)}
|
||||
* when done with this mocked Connection.
|
||||
* @throws IOException
|
||||
*/
|
||||
private HConnection mockConnection(final HRegionInterface implementation)
|
||||
throws IOException {
|
||||
HConnection connection =
|
||||
HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration());
|
||||
// Make it so we return any old location when asked.
|
||||
final HRegionLocation anyLocation =
|
||||
new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, SN.getHostname(),
|
||||
SN.getPort());
|
||||
Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(),
|
||||
(byte[]) Mockito.any(), Mockito.anyBoolean())).
|
||||
thenReturn(anyLocation);
|
||||
if (implementation != null) {
|
||||
// If a call to getHRegionConnection, return this implementation.
|
||||
Mockito.when(connection.getHRegionConnection(Mockito.anyString(), Mockito.anyInt())).
|
||||
thenReturn(implementation);
|
||||
}
|
||||
return connection;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A mocked up Result that fakes a Get on a row in the
|
||||
* <code>.META.</code> table.
|
||||
* @throws IOException
|
||||
*/
|
||||
private Result getMetaTableRowResult() throws IOException {
|
||||
List<KeyValue> kvs = new ArrayList<KeyValue>();
|
||||
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
||||
HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
|
||||
Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO)));
|
||||
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
||||
HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
|
||||
Bytes.toBytes(SN.getHostAndPort())));
|
||||
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
||||
HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
|
||||
Bytes.toBytes(SN.getStartcode())));
|
||||
return new Result(kvs);
|
||||
}
|
||||
|
||||
private void startWaitAliveThenWaitItLives(final Thread t, final int ms) {
|
||||
t.start();
|
||||
while(!t.isAlive()) {
|
||||
|
|
Loading…
Reference in New Issue