diff --git a/CHANGES.txt b/CHANGES.txt
index b93fec1db5a..e9ef89ba9ba 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -773,6 +773,7 @@ Release 0.92.0 - Unreleased
(Jonathan Hsieh)
HBASE-4714 Don't ship w/ icms enabled by default
HBASE-4747 Upgrade maven surefire plugin to 2.10
+ HBASE-4288 "Server not running" exception during meta verification causes RS abort
NEW FEATURES
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via
diff --git a/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java b/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
index d51efb928ed..c756e9aa863 100644
--- a/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
+++ b/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
@@ -306,7 +306,7 @@ public class CatalogTracker {
* @throws InterruptedException
* @throws NotAllMetaRegionsOnlineException if timed out waiting
* @throws IOException
- * @deprecated Use {@link #getRootServerConnection(long)}
+ * @deprecated Use #getRootServerConnection(long)
*/
public HRegionInterface waitForRootServerConnection(long timeout)
throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
@@ -383,18 +383,17 @@ public class CatalogTracker {
// which we have to wait on.
ServerName newLocation =
MetaReader.readRegionLocation(this, META_REGION_NAME);
- if (newLocation == null) {
- LOG.debug(".META. server unavailable.");
- return null;
- }
+ if (newLocation == null) return null;
HRegionInterface newConnection = getCachedConnection(newLocation);
if (verifyRegionLocation(newConnection, newLocation, META_REGION_NAME)) {
setMetaLocation(newLocation);
return newConnection;
} else {
- LOG.debug("new .META. server: " + newLocation + " isn't valid." +
- " Cached .META. server: " + this.metaLocation);
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("New .META. server: " + newLocation + " isn't valid." +
+ " Cached .META. server: " + this.metaLocation);
+ }
}
return null;
}
@@ -412,7 +411,7 @@ public class CatalogTracker {
try {
if (waitForMeta(100) != null) break;
} catch (NotAllMetaRegionsOnlineException e) {
- LOG.info("Retrying", e);
+ if (LOG.isTraceEnabled()) LOG.trace("Retrying", e);
} catch (IOException e) {
LOG.info("Retrying", e);
}
@@ -436,7 +435,7 @@ public class CatalogTracker {
public ServerName waitForMeta(long timeout)
throws InterruptedException, IOException, NotAllMetaRegionsOnlineException {
long stop = System.currentTimeMillis() + timeout;
- long waitTime = Math.min(500, timeout);
+ long waitTime = Math.min(50, timeout);
synchronized (metaAvailable) {
while(!stopped && (timeout == 0 || System.currentTimeMillis() < stop)) {
if (getMetaServerConnection() != null) {
@@ -499,15 +498,16 @@ public class CatalogTracker {
}
/**
- * Caller must be synchronized on this.metaAvailable
* @param metaLocation
*/
- private void setMetaLocation(final ServerName metaLocation) {
- LOG.debug("set new cached META location: " + metaLocation);
- metaAvailable.set(true);
- this.metaLocation = metaLocation;
- // no synchronization because these are private and already under lock
- this.metaAvailable.notifyAll();
+ void setMetaLocation(final ServerName metaLocation) {
+ LOG.debug("Set new cached META location: " + metaLocation);
+ synchronized (this.metaAvailable) {
+ this.metaLocation = metaLocation;
+ this.metaAvailable.set(true);
+ // no synchronization because these are private and already under lock
+ this.metaAvailable.notifyAll();
+ }
}
/**
diff --git a/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java b/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java
index 8fb391276ad..ae3150526d4 100644
--- a/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java
+++ b/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java
@@ -95,11 +95,6 @@ public class TestCatalogTracker {
this.watcher.close();
}
- private CatalogTracker constructAndStartCatalogTracker()
- throws IOException, InterruptedException {
- return constructAndStartCatalogTracker(null);
- }
-
private CatalogTracker constructAndStartCatalogTracker(final HConnection c)
throws IOException, InterruptedException {
CatalogTracker ct = new CatalogTracker(this.watcher, null, c,
@@ -135,57 +130,103 @@ public class TestCatalogTracker {
*/
@Test public void testInterruptWaitOnMetaAndRoot()
throws IOException, InterruptedException {
- final CatalogTracker ct = constructAndStartCatalogTracker();
- ServerName hsa = ct.getRootLocation();
- Assert.assertNull(hsa);
- ServerName meta = ct.getMetaLocation();
- Assert.assertNull(meta);
- Thread t = new Thread() {
- @Override
- public void run() {
- try {
- ct.waitForMeta();
- } catch (InterruptedException e) {
- throw new RuntimeException("Interrupted", e);
+ HConnection connection = mockConnection(null);
+ try {
+ final CatalogTracker ct = constructAndStartCatalogTracker(connection);
+ ServerName hsa = ct.getRootLocation();
+ Assert.assertNull(hsa);
+ ServerName meta = ct.getMetaLocation();
+ Assert.assertNull(meta);
+ Thread t = new Thread() {
+ @Override
+ public void run() {
+ try {
+ ct.waitForMeta();
+ } catch (InterruptedException e) {
+ throw new RuntimeException("Interrupted", e);
+ }
}
- }
- };
- t.start();
- while (!t.isAlive()) Threads.sleep(1);
- Threads.sleep(1);
- assertTrue(t.isAlive());
- ct.stop();
- // Join the thread... should exit shortly.
- t.join();
+ };
+ t.start();
+ while (!t.isAlive())
+ Threads.sleep(1);
+ Threads.sleep(1);
+ assertTrue(t.isAlive());
+ ct.stop();
+ // Join the thread... should exit shortly.
+ t.join();
+ } finally {
+ HConnectionManager.deleteConnection(UTIL.getConfiguration(), true);
+ }
}
+ /**
+ * Test for HBASE-4288. Throw an IOE when trying to verify meta region and
+ * prove it doesn't cause master shutdown.
+ * @see HBASE-4288
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws KeeperException
+ */
@Test
- public void testGetMetaServerConnectionFails()
+ public void testServerNotRunningIOException()
throws IOException, InterruptedException, KeeperException {
- HConnection connection =
- HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration());
+ // Mock an HRegionInterface.
+ final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
+ HConnection connection = mockConnection(implementation);
try {
- // Mock an HRegionInterface.
- final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
- // If a 'get' is called on mocked interface, throw connection refused.
- Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())).
- thenThrow(new ConnectException("Connection refused"));
- // Make it so our implementation is returned when we do a connection.
- // Need to fake out the location lookup stuff first.
- ServerName sn = new ServerName("example.com", 1234, System.currentTimeMillis());
- final HRegionLocation anyLocation =
- new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, sn.getHostname(),
- sn.getPort());
- Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(),
- (byte[]) Mockito.any(), Mockito.anyBoolean())).
- thenReturn(anyLocation);
- Mockito.when(connection.getHRegionConnection(Mockito.anyString(),
- Mockito.anyInt(), Matchers.anyBoolean())).
- thenReturn(implementation);
+ // If a 'getRegionInfo' is called on mocked HRegionInterface, throw IOE
+ // the first time. 'Succeed' the second time we are called.
+ Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())).
+ thenThrow(new IOException("Server not running, aborting")).
+ thenReturn(new HRegionInfo());
+ // After we encounter the above 'Server not running', we should catch the
+ // IOE and go into retrying for the meta mode. We'll do gets on -ROOT- to
+ // get new meta location. Return something so this 'get' succeeds
+ // (here we mock up getRegionServerWithRetries, the wrapper around
+ // the actual get).
+ Mockito.when(connection.getRegionServerWithRetries((ServerCallable)Mockito.any())).
+ thenReturn(getMetaTableRowResult());
// Now start up the catalogtracker with our doctored Connection.
final CatalogTracker ct = constructAndStartCatalogTracker(connection);
try {
- RootLocationEditor.setRootLocation(this.watcher, sn);
+ // Set a location for root and meta.
+ RootLocationEditor.setRootLocation(this.watcher, SN);
+ ct.setMetaLocation(SN);
+ // Call the method that HBASE-4288 calls.
+ Assert.assertFalse(ct.waitForMetaServerConnectionDefault() == null);
+ } finally {
+ // Clean out root and meta locations or later tests will be confused...
+ // they presume start fresh in zk.
+ ct.resetMetaLocation();
+ RootLocationEditor.deleteRootLocation(this.watcher);
+ }
+ } finally {
+ // Clear out our doctored connection or could mess up subsequent tests.
+ HConnectionManager.deleteConnection(UTIL.getConfiguration(), true);
+ }
+ }
+
+ /**
+ * Test we survive a connection refused {@link ConnectException}
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws KeeperException
+ */
+ @Test
+ public void testGetMetaServerConnectionFails()
+ throws IOException, InterruptedException, KeeperException {
+ // Mock an HRegionInterface.
+ final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
+ HConnection connection = mockConnection(implementation);
+ try {
+ // If a 'get' is called on mocked interface, throw connection refused.
+ Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())).
+ thenThrow(new ConnectException("Connection refused"));
+ // Now start up the catalogtracker with our doctored Connection.
+ final CatalogTracker ct = constructAndStartCatalogTracker(connection);
+ try {
+ RootLocationEditor.setRootLocation(this.watcher, SN);
long timeout = UTIL.getConfiguration().
getLong("hbase.catalog.verification.timeout", 1000);
Assert.assertFalse(ct.verifyMetaRegionLocation(timeout));
@@ -234,7 +275,8 @@ public class TestCatalogTracker {
@Test (expected = NotAllMetaRegionsOnlineException.class)
public void testTimeoutWaitForRoot()
throws IOException, InterruptedException {
- final CatalogTracker ct = constructAndStartCatalogTracker();
+ HConnection connection = Mockito.mock(HConnection.class);
+ final CatalogTracker ct = constructAndStartCatalogTracker(connection);
ct.waitForRoot(100);
}
@@ -259,7 +301,8 @@ public class TestCatalogTracker {
*/
@Test public void testNoTimeoutWaitForRoot()
throws IOException, InterruptedException, KeeperException {
- final CatalogTracker ct = constructAndStartCatalogTracker();
+ HConnection connection = Mockito.mock(HConnection.class);
+ final CatalogTracker ct = constructAndStartCatalogTracker(connection);
ServerName hsa = ct.getRootLocation();
Assert.assertNull(hsa);
@@ -328,17 +371,7 @@ public class TestCatalogTracker {
// like things have been getting assigned. Make it so we'll return a
// location (no matter what the Get is). Same for getHRegionInfo -- always
// just return the meta region.
- List kvs = new ArrayList();
- kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
- HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
- Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO)));
- kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
- HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
- Bytes.toBytes(SN.getHostAndPort())));
- kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
- HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
- Bytes.toBytes(SN.getStartcode())));
- final Result result = new Result(kvs);
+ final Result result = getMetaTableRowResult();
Mockito.when(connection.getRegionServerWithRetries((ServerCallable)Mockito.any())).
thenReturn(result);
Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())).
@@ -361,6 +394,57 @@ public class TestCatalogTracker {
}
}
+ /**
+ * @param implementation An {@link HRegionInterface} instance; you'll likely
+ * want to pass a mocked HRS; can be null.
+ * @return Mock up a connection that returns a {@link Configuration} when
+ * {@link HConnection#getConfiguration()} is called, a 'location' when
+ * {@link HConnection#getRegionLocation(byte[], byte[], boolean)} is called,
+ * and that returns the passed {@link HRegionInterface} instance when
+ * {@link HConnection#getHRegionConnection(String, int)}
+ * is called (Be sure call
+ * {@link HConnectionManager#deleteConnection(org.apache.hadoop.conf.Configuration, boolean)}
+ * when done with this mocked Connection.
+ * @throws IOException
+ */
+ private HConnection mockConnection(final HRegionInterface implementation)
+ throws IOException {
+ HConnection connection =
+ HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration());
+ // Make it so we return any old location when asked.
+ final HRegionLocation anyLocation =
+ new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, SN.getHostname(),
+ SN.getPort());
+ Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(),
+ (byte[]) Mockito.any(), Mockito.anyBoolean())).
+ thenReturn(anyLocation);
+ if (implementation != null) {
+ // If a call to getHRegionConnection, return this implementation.
+ Mockito.when(connection.getHRegionConnection(Mockito.anyString(), Mockito.anyInt())).
+ thenReturn(implementation);
+ }
+ return connection;
+ }
+
+ /**
+ * @return A mocked up Result that fakes a Get on a row in the
+ * .META.
table.
+ * @throws IOException
+ */
+ private Result getMetaTableRowResult() throws IOException {
+ List kvs = new ArrayList();
+ kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
+ HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
+ Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO)));
+ kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
+ HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
+ Bytes.toBytes(SN.getHostAndPort())));
+ kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY,
+ HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
+ Bytes.toBytes(SN.getStartcode())));
+ return new Result(kvs);
+ }
+
private void startWaitAliveThenWaitItLives(final Thread t, final int ms) {
t.start();
while(!t.isAlive()) {