diff --git a/CHANGES.txt b/CHANGES.txt index b93fec1db5a..e9ef89ba9ba 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -773,6 +773,7 @@ Release 0.92.0 - Unreleased (Jonathan Hsieh) HBASE-4714 Don't ship w/ icms enabled by default HBASE-4747 Upgrade maven surefire plugin to 2.10 + HBASE-4288 "Server not running" exception during meta verification causes RS abort NEW FEATURES HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via diff --git a/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java b/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java index d51efb928ed..c756e9aa863 100644 --- a/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java +++ b/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java @@ -306,7 +306,7 @@ public class CatalogTracker { * @throws InterruptedException * @throws NotAllMetaRegionsOnlineException if timed out waiting * @throws IOException - * @deprecated Use {@link #getRootServerConnection(long)} + * @deprecated Use #getRootServerConnection(long) */ public HRegionInterface waitForRootServerConnection(long timeout) throws InterruptedException, NotAllMetaRegionsOnlineException, IOException { @@ -383,18 +383,17 @@ public class CatalogTracker { // which we have to wait on. ServerName newLocation = MetaReader.readRegionLocation(this, META_REGION_NAME); - if (newLocation == null) { - LOG.debug(".META. server unavailable."); - return null; - } + if (newLocation == null) return null; HRegionInterface newConnection = getCachedConnection(newLocation); if (verifyRegionLocation(newConnection, newLocation, META_REGION_NAME)) { setMetaLocation(newLocation); return newConnection; } else { - LOG.debug("new .META. server: " + newLocation + " isn't valid." + - " Cached .META. server: " + this.metaLocation); + if (LOG.isTraceEnabled()) { + LOG.trace("New .META. server: " + newLocation + " isn't valid." + + " Cached .META. server: " + this.metaLocation); + } } return null; } @@ -412,7 +411,7 @@ public class CatalogTracker { try { if (waitForMeta(100) != null) break; } catch (NotAllMetaRegionsOnlineException e) { - LOG.info("Retrying", e); + if (LOG.isTraceEnabled()) LOG.trace("Retrying", e); } catch (IOException e) { LOG.info("Retrying", e); } @@ -436,7 +435,7 @@ public class CatalogTracker { public ServerName waitForMeta(long timeout) throws InterruptedException, IOException, NotAllMetaRegionsOnlineException { long stop = System.currentTimeMillis() + timeout; - long waitTime = Math.min(500, timeout); + long waitTime = Math.min(50, timeout); synchronized (metaAvailable) { while(!stopped && (timeout == 0 || System.currentTimeMillis() < stop)) { if (getMetaServerConnection() != null) { @@ -499,15 +498,16 @@ public class CatalogTracker { } /** - * Caller must be synchronized on this.metaAvailable * @param metaLocation */ - private void setMetaLocation(final ServerName metaLocation) { - LOG.debug("set new cached META location: " + metaLocation); - metaAvailable.set(true); - this.metaLocation = metaLocation; - // no synchronization because these are private and already under lock - this.metaAvailable.notifyAll(); + void setMetaLocation(final ServerName metaLocation) { + LOG.debug("Set new cached META location: " + metaLocation); + synchronized (this.metaAvailable) { + this.metaLocation = metaLocation; + this.metaAvailable.set(true); + // no synchronization because these are private and already under lock + this.metaAvailable.notifyAll(); + } } /** diff --git a/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java b/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java index 8fb391276ad..ae3150526d4 100644 --- a/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java +++ b/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java @@ -95,11 +95,6 @@ public class TestCatalogTracker { this.watcher.close(); } - private CatalogTracker constructAndStartCatalogTracker() - throws IOException, InterruptedException { - return constructAndStartCatalogTracker(null); - } - private CatalogTracker constructAndStartCatalogTracker(final HConnection c) throws IOException, InterruptedException { CatalogTracker ct = new CatalogTracker(this.watcher, null, c, @@ -135,57 +130,103 @@ public class TestCatalogTracker { */ @Test public void testInterruptWaitOnMetaAndRoot() throws IOException, InterruptedException { - final CatalogTracker ct = constructAndStartCatalogTracker(); - ServerName hsa = ct.getRootLocation(); - Assert.assertNull(hsa); - ServerName meta = ct.getMetaLocation(); - Assert.assertNull(meta); - Thread t = new Thread() { - @Override - public void run() { - try { - ct.waitForMeta(); - } catch (InterruptedException e) { - throw new RuntimeException("Interrupted", e); + HConnection connection = mockConnection(null); + try { + final CatalogTracker ct = constructAndStartCatalogTracker(connection); + ServerName hsa = ct.getRootLocation(); + Assert.assertNull(hsa); + ServerName meta = ct.getMetaLocation(); + Assert.assertNull(meta); + Thread t = new Thread() { + @Override + public void run() { + try { + ct.waitForMeta(); + } catch (InterruptedException e) { + throw new RuntimeException("Interrupted", e); + } } - } - }; - t.start(); - while (!t.isAlive()) Threads.sleep(1); - Threads.sleep(1); - assertTrue(t.isAlive()); - ct.stop(); - // Join the thread... should exit shortly. - t.join(); + }; + t.start(); + while (!t.isAlive()) + Threads.sleep(1); + Threads.sleep(1); + assertTrue(t.isAlive()); + ct.stop(); + // Join the thread... should exit shortly. + t.join(); + } finally { + HConnectionManager.deleteConnection(UTIL.getConfiguration(), true); + } } + /** + * Test for HBASE-4288. Throw an IOE when trying to verify meta region and + * prove it doesn't cause master shutdown. + * @see HBASE-4288 + * @throws IOException + * @throws InterruptedException + * @throws KeeperException + */ @Test - public void testGetMetaServerConnectionFails() + public void testServerNotRunningIOException() throws IOException, InterruptedException, KeeperException { - HConnection connection = - HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration()); + // Mock an HRegionInterface. + final HRegionInterface implementation = Mockito.mock(HRegionInterface.class); + HConnection connection = mockConnection(implementation); try { - // Mock an HRegionInterface. - final HRegionInterface implementation = Mockito.mock(HRegionInterface.class); - // If a 'get' is called on mocked interface, throw connection refused. - Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())). - thenThrow(new ConnectException("Connection refused")); - // Make it so our implementation is returned when we do a connection. - // Need to fake out the location lookup stuff first. - ServerName sn = new ServerName("example.com", 1234, System.currentTimeMillis()); - final HRegionLocation anyLocation = - new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, sn.getHostname(), - sn.getPort()); - Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(), - (byte[]) Mockito.any(), Mockito.anyBoolean())). - thenReturn(anyLocation); - Mockito.when(connection.getHRegionConnection(Mockito.anyString(), - Mockito.anyInt(), Matchers.anyBoolean())). - thenReturn(implementation); + // If a 'getRegionInfo' is called on mocked HRegionInterface, throw IOE + // the first time. 'Succeed' the second time we are called. + Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())). + thenThrow(new IOException("Server not running, aborting")). + thenReturn(new HRegionInfo()); + // After we encounter the above 'Server not running', we should catch the + // IOE and go into retrying for the meta mode. We'll do gets on -ROOT- to + // get new meta location. Return something so this 'get' succeeds + // (here we mock up getRegionServerWithRetries, the wrapper around + // the actual get). + Mockito.when(connection.getRegionServerWithRetries((ServerCallable)Mockito.any())). + thenReturn(getMetaTableRowResult()); // Now start up the catalogtracker with our doctored Connection. final CatalogTracker ct = constructAndStartCatalogTracker(connection); try { - RootLocationEditor.setRootLocation(this.watcher, sn); + // Set a location for root and meta. + RootLocationEditor.setRootLocation(this.watcher, SN); + ct.setMetaLocation(SN); + // Call the method that HBASE-4288 calls. + Assert.assertFalse(ct.waitForMetaServerConnectionDefault() == null); + } finally { + // Clean out root and meta locations or later tests will be confused... + // they presume start fresh in zk. + ct.resetMetaLocation(); + RootLocationEditor.deleteRootLocation(this.watcher); + } + } finally { + // Clear out our doctored connection or could mess up subsequent tests. + HConnectionManager.deleteConnection(UTIL.getConfiguration(), true); + } + } + + /** + * Test we survive a connection refused {@link ConnectException} + * @throws IOException + * @throws InterruptedException + * @throws KeeperException + */ + @Test + public void testGetMetaServerConnectionFails() + throws IOException, InterruptedException, KeeperException { + // Mock an HRegionInterface. + final HRegionInterface implementation = Mockito.mock(HRegionInterface.class); + HConnection connection = mockConnection(implementation); + try { + // If a 'get' is called on mocked interface, throw connection refused. + Mockito.when(implementation.get((byte[]) Mockito.any(), (Get) Mockito.any())). + thenThrow(new ConnectException("Connection refused")); + // Now start up the catalogtracker with our doctored Connection. + final CatalogTracker ct = constructAndStartCatalogTracker(connection); + try { + RootLocationEditor.setRootLocation(this.watcher, SN); long timeout = UTIL.getConfiguration(). getLong("hbase.catalog.verification.timeout", 1000); Assert.assertFalse(ct.verifyMetaRegionLocation(timeout)); @@ -234,7 +275,8 @@ public class TestCatalogTracker { @Test (expected = NotAllMetaRegionsOnlineException.class) public void testTimeoutWaitForRoot() throws IOException, InterruptedException { - final CatalogTracker ct = constructAndStartCatalogTracker(); + HConnection connection = Mockito.mock(HConnection.class); + final CatalogTracker ct = constructAndStartCatalogTracker(connection); ct.waitForRoot(100); } @@ -259,7 +301,8 @@ public class TestCatalogTracker { */ @Test public void testNoTimeoutWaitForRoot() throws IOException, InterruptedException, KeeperException { - final CatalogTracker ct = constructAndStartCatalogTracker(); + HConnection connection = Mockito.mock(HConnection.class); + final CatalogTracker ct = constructAndStartCatalogTracker(connection); ServerName hsa = ct.getRootLocation(); Assert.assertNull(hsa); @@ -328,17 +371,7 @@ public class TestCatalogTracker { // like things have been getting assigned. Make it so we'll return a // location (no matter what the Get is). Same for getHRegionInfo -- always // just return the meta region. - List kvs = new ArrayList(); - kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY, - HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, - Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO))); - kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY, - HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, - Bytes.toBytes(SN.getHostAndPort()))); - kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY, - HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, - Bytes.toBytes(SN.getStartcode()))); - final Result result = new Result(kvs); + final Result result = getMetaTableRowResult(); Mockito.when(connection.getRegionServerWithRetries((ServerCallable)Mockito.any())). thenReturn(result); Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any())). @@ -361,6 +394,57 @@ public class TestCatalogTracker { } } + /** + * @param implementation An {@link HRegionInterface} instance; you'll likely + * want to pass a mocked HRS; can be null. + * @return Mock up a connection that returns a {@link Configuration} when + * {@link HConnection#getConfiguration()} is called, a 'location' when + * {@link HConnection#getRegionLocation(byte[], byte[], boolean)} is called, + * and that returns the passed {@link HRegionInterface} instance when + * {@link HConnection#getHRegionConnection(String, int)} + * is called (Be sure call + * {@link HConnectionManager#deleteConnection(org.apache.hadoop.conf.Configuration, boolean)} + * when done with this mocked Connection. + * @throws IOException + */ + private HConnection mockConnection(final HRegionInterface implementation) + throws IOException { + HConnection connection = + HConnectionTestingUtility.getMockedConnection(UTIL.getConfiguration()); + // Make it so we return any old location when asked. + final HRegionLocation anyLocation = + new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, SN.getHostname(), + SN.getPort()); + Mockito.when(connection.getRegionLocation((byte[]) Mockito.any(), + (byte[]) Mockito.any(), Mockito.anyBoolean())). + thenReturn(anyLocation); + if (implementation != null) { + // If a call to getHRegionConnection, return this implementation. + Mockito.when(connection.getHRegionConnection(Mockito.anyString(), Mockito.anyInt())). + thenReturn(implementation); + } + return connection; + } + + /** + * @return A mocked up Result that fakes a Get on a row in the + * .META. table. + * @throws IOException + */ + private Result getMetaTableRowResult() throws IOException { + List kvs = new ArrayList(); + kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY, + HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, + Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO))); + kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY, + HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, + Bytes.toBytes(SN.getHostAndPort()))); + kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY, + HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, + Bytes.toBytes(SN.getStartcode()))); + return new Result(kvs); + } + private void startWaitAliveThenWaitItLives(final Thread t, final int ms) { t.start(); while(!t.isAlive()) {