HBASE-4288 "Server not running" exception during meta verification causes RS abort
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1200648 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
04afdf9633
commit
274e7c2743
|
@ -773,6 +773,7 @@ Release 0.92.0 - Unreleased
|
||||||
(Jonathan Hsieh)
|
(Jonathan Hsieh)
|
||||||
HBASE-4714 Don't ship w/ icms enabled by default
|
HBASE-4714 Don't ship w/ icms enabled by default
|
||||||
HBASE-4747 Upgrade maven surefire plugin to 2.10
|
HBASE-4747 Upgrade maven surefire plugin to 2.10
|
||||||
|
HBASE-4288 "Server not running" exception during meta verification causes RS abort
|
||||||
|
|
||||||
NEW FEATURES
|
NEW FEATURES
|
||||||
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via
|
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via
|
||||||
|
|
|
@ -306,7 +306,7 @@ public class CatalogTracker {
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
* @throws NotAllMetaRegionsOnlineException if timed out waiting
|
* @throws NotAllMetaRegionsOnlineException if timed out waiting
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @deprecated Use {@link #getRootServerConnection(long)}
|
* @deprecated Use #getRootServerConnection(long)
|
||||||
*/
|
*/
|
||||||
public HRegionInterface waitForRootServerConnection(long timeout)
|
public HRegionInterface waitForRootServerConnection(long timeout)
|
||||||
throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
|
throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
|
||||||
|
@ -383,19 +383,18 @@ public class CatalogTracker {
|
||||||
// which we have to wait on.
|
// which we have to wait on.
|
||||||
ServerName newLocation =
|
ServerName newLocation =
|
||||||
MetaReader.readRegionLocation(this, META_REGION_NAME);
|
MetaReader.readRegionLocation(this, META_REGION_NAME);
|
||||||
if (newLocation == null) {
|
if (newLocation == null) return null;
|
||||||
LOG.debug(".META. server unavailable.");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
HRegionInterface newConnection = getCachedConnection(newLocation);
|
HRegionInterface newConnection = getCachedConnection(newLocation);
|
||||||
if (verifyRegionLocation(newConnection, newLocation, META_REGION_NAME)) {
|
if (verifyRegionLocation(newConnection, newLocation, META_REGION_NAME)) {
|
||||||
setMetaLocation(newLocation);
|
setMetaLocation(newLocation);
|
||||||
return newConnection;
|
return newConnection;
|
||||||
} else {
|
} else {
|
||||||
LOG.debug("new .META. server: " + newLocation + " isn't valid." +
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("New .META. server: " + newLocation + " isn't valid." +
|
||||||
" Cached .META. server: " + this.metaLocation);
|
" Cached .META. server: " + this.metaLocation);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -412,7 +411,7 @@ public class CatalogTracker {
|
||||||
try {
|
try {
|
||||||
if (waitForMeta(100) != null) break;
|
if (waitForMeta(100) != null) break;
|
||||||
} catch (NotAllMetaRegionsOnlineException e) {
|
} catch (NotAllMetaRegionsOnlineException e) {
|
||||||
LOG.info("Retrying", e);
|
if (LOG.isTraceEnabled()) LOG.trace("Retrying", e);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.info("Retrying", e);
|
LOG.info("Retrying", e);
|
||||||
}
|
}
|
||||||
|
@ -436,7 +435,7 @@ public class CatalogTracker {
|
||||||
public ServerName waitForMeta(long timeout)
|
public ServerName waitForMeta(long timeout)
|
||||||
throws InterruptedException, IOException, NotAllMetaRegionsOnlineException {
|
throws InterruptedException, IOException, NotAllMetaRegionsOnlineException {
|
||||||
long stop = System.currentTimeMillis() + timeout;
|
long stop = System.currentTimeMillis() + timeout;
|
||||||
long waitTime = Math.min(500, timeout);
|
long waitTime = Math.min(50, timeout);
|
||||||
synchronized (metaAvailable) {
|
synchronized (metaAvailable) {
|
||||||
while(!stopped && (timeout == 0 || System.currentTimeMillis() < stop)) {
|
while(!stopped && (timeout == 0 || System.currentTimeMillis() < stop)) {
|
||||||
if (getMetaServerConnection() != null) {
|
if (getMetaServerConnection() != null) {
|
||||||
|
@ -499,16 +498,17 @@ public class CatalogTracker {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Caller must be synchronized on this.metaAvailable
|
|
||||||
* @param metaLocation
|
* @param metaLocation
|
||||||
*/
|
*/
|
||||||
private void setMetaLocation(final ServerName metaLocation) {
|
void setMetaLocation(final ServerName metaLocation) {
|
||||||
LOG.debug("set new cached META location: " + metaLocation);
|
LOG.debug("Set new cached META location: " + metaLocation);
|
||||||
metaAvailable.set(true);
|
synchronized (this.metaAvailable) {
|
||||||
this.metaLocation = metaLocation;
|
this.metaLocation = metaLocation;
|
||||||
|
this.metaAvailable.set(true);
|
||||||
// no synchronization because these are private and already under lock
|
// no synchronization because these are private and already under lock
|
||||||
this.metaAvailable.notifyAll();
|
this.metaAvailable.notifyAll();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param sn ServerName to get a connection against.
|
* @param sn ServerName to get a connection against.
|
||||||
|
|
|
@ -95,11 +95,6 @@ public class TestCatalogTracker {
|
||||||
this.watcher.close();
|
this.watcher.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private CatalogTracker constructAndStartCatalogTracker()
|
|
||||||
throws IOException, InterruptedException {
|
|
||||||
return constructAndStartCatalogTracker(null);
|
|
||||||
}
|
|
||||||
|
|
||||||
private CatalogTracker constructAndStartCatalogTracker(final HConnection c)
|
private CatalogTracker constructAndStartCatalogTracker(final HConnection c)
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
CatalogTracker ct = new CatalogTracker(this.watcher, null, c,
|
CatalogTracker ct = new CatalogTracker(this.watcher, null, c,
|
||||||
|
@ -135,7 +130,9 @@ public class TestCatalogTracker {
|
||||||
*/
|
*/
|
||||||
@Test public void testInterruptWaitOnMetaAndRoot()
|
@Test public void testInterruptWaitOnMetaAndRoot()
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
final CatalogTracker ct = constructAndStartCatalogTracker();
|
HConnection connection = mockConnection(null);
|
||||||
|
try {
|
||||||
|
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
||||||
ServerName hsa = ct.getRootLocation();
|
ServerName hsa = ct.getRootLocation();
|
||||||
Assert.assertNull(hsa);
|
Assert.assertNull(hsa);
|
||||||
ServerName meta = ct.getMetaLocation();
|
ServerName meta = ct.getMetaLocation();
|
||||||
|
@ -151,41 +148,85 @@ public class TestCatalogTracker {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
t.start();
|
t.start();
|
||||||
while (!t.isAlive()) Threads.sleep(1);
|
while (!t.isAlive())
|
||||||
|
Threads.sleep(1);
|
||||||
Threads.sleep(1);
|
Threads.sleep(1);
|
||||||
assertTrue(t.isAlive());
|
assertTrue(t.isAlive());
|
||||||
ct.stop();
|
ct.stop();
|
||||||
// Join the thread... should exit shortly.
|
// Join the thread... should exit shortly.
|
||||||
t.join();
|
t.join();
|
||||||
|
} finally {
|
||||||
|
HConnectionManager.deleteConnection(UTIL.getConfiguration(), true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for HBASE-4288. Throw an IOE when trying to verify meta region and
|
||||||
|
* prove it doesn't cause master shutdown.
|
||||||
|
* @see <a href="https://issues.apache.org/jira/browse/HBASE-4288">HBASE-4288</a>
|
||||||
|
* @throws IOException
|
||||||
|
* @throws InterruptedException
|
||||||
|
* @throws KeeperException
|
||||||
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testGetMetaServerConnectionFails()
|
public void testServerNotRunningIOException()
|
||||||
throws IOException, InterruptedException, KeeperException {
|
throws IOException, InterruptedException, KeeperException {
|
||||||
HConnection connection =
|
|
||||||
HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration());
|
|
||||||
try {
|
|
||||||
// Mock an HRegionInterface.
|
// Mock an HRegionInterface.
|
||||||
final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
|
final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
|
||||||
// If a 'get' is called on mocked interface, throw connection refused.
|
HConnection connection = mockConnection(implementation);
|
||||||
Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())).
|
try {
|
||||||
thenThrow(new ConnectException("Connection refused"));
|
// If a 'getRegionInfo' is called on mocked HRegionInterface, throw IOE
|
||||||
// Make it so our implementation is returned when we do a connection.
|
// the first time. 'Succeed' the second time we are called.
|
||||||
// Need to fake out the location lookup stuff first.
|
Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())).
|
||||||
ServerName sn = new ServerName("example.com", 1234, System.currentTimeMillis());
|
thenThrow(new IOException("Server not running, aborting")).
|
||||||
final HRegionLocation anyLocation =
|
thenReturn(new HRegionInfo());
|
||||||
new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, sn.getHostname(),
|
// After we encounter the above 'Server not running', we should catch the
|
||||||
sn.getPort());
|
// IOE and go into retrying for the meta mode. We'll do gets on -ROOT- to
|
||||||
Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(),
|
// get new meta location. Return something so this 'get' succeeds
|
||||||
(byte[]) Mockito.any(), Mockito.anyBoolean())).
|
// (here we mock up getRegionServerWithRetries, the wrapper around
|
||||||
thenReturn(anyLocation);
|
// the actual get).
|
||||||
Mockito.when(connection.getHRegionConnection(Mockito.anyString(),
|
Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>)Mockito.any())).
|
||||||
Mockito.anyInt(), Matchers.anyBoolean())).
|
thenReturn(getMetaTableRowResult());
|
||||||
thenReturn(implementation);
|
|
||||||
// Now start up the catalogtracker with our doctored Connection.
|
// Now start up the catalogtracker with our doctored Connection.
|
||||||
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
||||||
try {
|
try {
|
||||||
RootLocationEditor.setRootLocation(this.watcher, sn);
|
// Set a location for root and meta.
|
||||||
|
RootLocationEditor.setRootLocation(this.watcher, SN);
|
||||||
|
ct.setMetaLocation(SN);
|
||||||
|
// Call the method that HBASE-4288 calls.
|
||||||
|
Assert.assertFalse(ct.waitForMetaServerConnectionDefault() == null);
|
||||||
|
} finally {
|
||||||
|
// Clean out root and meta locations or later tests will be confused...
|
||||||
|
// they presume start fresh in zk.
|
||||||
|
ct.resetMetaLocation();
|
||||||
|
RootLocationEditor.deleteRootLocation(this.watcher);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
// Clear out our doctored connection or could mess up subsequent tests.
|
||||||
|
HConnectionManager.deleteConnection(UTIL.getConfiguration(), true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test we survive a connection refused {@link ConnectException}
|
||||||
|
* @throws IOException
|
||||||
|
* @throws InterruptedException
|
||||||
|
* @throws KeeperException
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testGetMetaServerConnectionFails()
|
||||||
|
throws IOException, InterruptedException, KeeperException {
|
||||||
|
// Mock an HRegionInterface.
|
||||||
|
final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
|
||||||
|
HConnection connection = mockConnection(implementation);
|
||||||
|
try {
|
||||||
|
// If a 'get' is called on mocked interface, throw connection refused.
|
||||||
|
Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())).
|
||||||
|
thenThrow(new ConnectException("Connection refused"));
|
||||||
|
// Now start up the catalogtracker with our doctored Connection.
|
||||||
|
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
||||||
|
try {
|
||||||
|
RootLocationEditor.setRootLocation(this.watcher, SN);
|
||||||
long timeout = UTIL.getConfiguration().
|
long timeout = UTIL.getConfiguration().
|
||||||
getLong("hbase.catalog.verification.timeout", 1000);
|
getLong("hbase.catalog.verification.timeout", 1000);
|
||||||
Assert.assertFalse(ct.verifyMetaRegionLocation(timeout));
|
Assert.assertFalse(ct.verifyMetaRegionLocation(timeout));
|
||||||
|
@ -234,7 +275,8 @@ public class TestCatalogTracker {
|
||||||
@Test (expected = NotAllMetaRegionsOnlineException.class)
|
@Test (expected = NotAllMetaRegionsOnlineException.class)
|
||||||
public void testTimeoutWaitForRoot()
|
public void testTimeoutWaitForRoot()
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
final CatalogTracker ct = constructAndStartCatalogTracker();
|
HConnection connection = Mockito.mock(HConnection.class);
|
||||||
|
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
||||||
ct.waitForRoot(100);
|
ct.waitForRoot(100);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -259,7 +301,8 @@ public class TestCatalogTracker {
|
||||||
*/
|
*/
|
||||||
@Test public void testNoTimeoutWaitForRoot()
|
@Test public void testNoTimeoutWaitForRoot()
|
||||||
throws IOException, InterruptedException, KeeperException {
|
throws IOException, InterruptedException, KeeperException {
|
||||||
final CatalogTracker ct = constructAndStartCatalogTracker();
|
HConnection connection = Mockito.mock(HConnection.class);
|
||||||
|
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
|
||||||
ServerName hsa = ct.getRootLocation();
|
ServerName hsa = ct.getRootLocation();
|
||||||
Assert.assertNull(hsa);
|
Assert.assertNull(hsa);
|
||||||
|
|
||||||
|
@ -328,17 +371,7 @@ public class TestCatalogTracker {
|
||||||
// like things have been getting assigned. Make it so we'll return a
|
// like things have been getting assigned. Make it so we'll return a
|
||||||
// location (no matter what the Get is). Same for getHRegionInfo -- always
|
// location (no matter what the Get is). Same for getHRegionInfo -- always
|
||||||
// just return the meta region.
|
// just return the meta region.
|
||||||
List<KeyValue> kvs = new ArrayList<KeyValue>();
|
final Result result = getMetaTableRowResult();
|
||||||
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
|
||||||
HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
|
|
||||||
Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO)));
|
|
||||||
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
|
||||||
HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
|
|
||||||
Bytes.toBytes(SN.getHostAndPort())));
|
|
||||||
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
|
||||||
HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
|
|
||||||
Bytes.toBytes(SN.getStartcode())));
|
|
||||||
final Result result = new Result(kvs);
|
|
||||||
Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>)Mockito.any())).
|
Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>)Mockito.any())).
|
||||||
thenReturn(result);
|
thenReturn(result);
|
||||||
Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())).
|
Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())).
|
||||||
|
@ -361,6 +394,57 @@ public class TestCatalogTracker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param implementation An {@link HRegionInterface} instance; you'll likely
|
||||||
|
* want to pass a mocked HRS; can be null.
|
||||||
|
* @return Mock up a connection that returns a {@link Configuration} when
|
||||||
|
* {@link HConnection#getConfiguration()} is called, a 'location' when
|
||||||
|
* {@link HConnection#getRegionLocation(byte[], byte[], boolean)} is called,
|
||||||
|
* and that returns the passed {@link HRegionInterface} instance when
|
||||||
|
* {@link HConnection#getHRegionConnection(String, int)}
|
||||||
|
* is called (Be sure call
|
||||||
|
* {@link HConnectionManager#deleteConnection(org.apache.hadoop.conf.Configuration, boolean)}
|
||||||
|
* when done with this mocked Connection.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private HConnection mockConnection(final HRegionInterface implementation)
|
||||||
|
throws IOException {
|
||||||
|
HConnection connection =
|
||||||
|
HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration());
|
||||||
|
// Make it so we return any old location when asked.
|
||||||
|
final HRegionLocation anyLocation =
|
||||||
|
new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, SN.getHostname(),
|
||||||
|
SN.getPort());
|
||||||
|
Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(),
|
||||||
|
(byte[]) Mockito.any(), Mockito.anyBoolean())).
|
||||||
|
thenReturn(anyLocation);
|
||||||
|
if (implementation != null) {
|
||||||
|
// If a call to getHRegionConnection, return this implementation.
|
||||||
|
Mockito.when(connection.getHRegionConnection(Mockito.anyString(), Mockito.anyInt())).
|
||||||
|
thenReturn(implementation);
|
||||||
|
}
|
||||||
|
return connection;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return A mocked up Result that fakes a Get on a row in the
|
||||||
|
* <code>.META.</code> table.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private Result getMetaTableRowResult() throws IOException {
|
||||||
|
List<KeyValue> kvs = new ArrayList<KeyValue>();
|
||||||
|
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
||||||
|
HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
|
||||||
|
Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO)));
|
||||||
|
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
||||||
|
HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
|
||||||
|
Bytes.toBytes(SN.getHostAndPort())));
|
||||||
|
kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
|
||||||
|
HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
|
||||||
|
Bytes.toBytes(SN.getStartcode())));
|
||||||
|
return new Result(kvs);
|
||||||
|
}
|
||||||
|
|
||||||
private void startWaitAliveThenWaitItLives(final Thread t, final int ms) {
|
private void startWaitAliveThenWaitItLives(final Thread t, final int ms) {
|
||||||
t.start();
|
t.start();
|
||||||
while(!t.isAlive()) {
|
while(!t.isAlive()) {
|
||||||
|
|
Loading…
Reference in New Issue