From 6dbce2a8cb3cf5a0a07b6c3f9825a99af03a6962 Mon Sep 17 00:00:00 2001 From: Phil Yang Date: Fri, 22 Jul 2016 11:58:48 +0800 Subject: [PATCH] HBASE-16266 Do not throw ScannerTimeoutException when catch UnknownScannerException Signed-off-by: zhangduo --- .../hadoop/hbase/client/ClientScanner.java | 38 ++++++------------- .../TestPartialResultsFromClientSide.java | 20 ++++++++++ .../hbase/client/TestScannerTimeout.java | 32 ---------------- 3 files changed, 32 insertions(+), 58 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java index 3b6b83a3f7b..cb4c71454b8 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java @@ -421,34 +421,20 @@ public abstract class ClientScanner extends AbstractClientScanner { clearPartialResults(); // DNRIOEs are thrown to make us break out of retries. Some types of DNRIOEs want us // to reset the scanner and come back in again. - if (e instanceof UnknownScannerException) { - long timeout = lastNext + scannerTimeout; - // If we are over the timeout, throw this exception to the client wrapped in - // a ScannerTimeoutException. Else, it's because the region moved and we used the old - // id against the new region server; reset the scanner. - if (timeout < System.currentTimeMillis()) { - LOG.info("For hints related to the following exception, please try taking a look at: " - + "https://hbase.apache.org/book.html#trouble.client.scantimeout"); - long elapsed = System.currentTimeMillis() - lastNext; - ScannerTimeoutException ex = - new ScannerTimeoutException(elapsed + "ms passed since the last invocation, " - + "timeout is currently set to " + scannerTimeout); - ex.initCause(e); - throw ex; - } + + // If exception is any but the list below throw it back to the client; else setup + // the scanner and retry. + Throwable cause = e.getCause(); + if ((cause != null && cause instanceof NotServingRegionException) || + (cause != null && cause instanceof RegionServerStoppedException) || + e instanceof OutOfOrderScannerNextException || + e instanceof UnknownScannerException ) { + // Pass. It is easier writing the if loop test as list of what is allowed rather than + // as a list of what is not allowed... so if in here, it means we do not throw. } else { - // If exception is any but the list below throw it back to the client; else setup - // the scanner and retry. - Throwable cause = e.getCause(); - if ((cause != null && cause instanceof NotServingRegionException) || - (cause != null && cause instanceof RegionServerStoppedException) || - e instanceof OutOfOrderScannerNextException) { - // Pass. It is easier writing the if loop test as list of what is allowed rather than - // as a list of what is not allowed... so if in here, it means we do not throw. - } else { - throw e; - } + throw e; } + // Else, its signal from depths of ScannerCallable that we need to reset the scanner. if (this.lastResult != null) { // The region has moved. We need to open a brand new scanner at the new location. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java index aea92bf99d8..a0da9c255a3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java @@ -100,8 +100,11 @@ public class TestPartialResultsFromClientSide { // getCellHeapSize(). private static long CELL_HEAP_SIZE = -1; + private static long timeout = 2000; + @BeforeClass public static void setUpBeforeClass() throws Exception { + TEST_UTIL.getConfiguration().setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, timeout); TEST_UTIL.startMiniCluster(MINICLUSTER_SIZE); TEST_UTIL.getAdmin().setBalancerRunning(false, true); TABLE = createTestTable(TABLE_NAME, ROWS, FAMILIES, QUALIFIERS, VALUE); @@ -1044,5 +1047,22 @@ public class TestPartialResultsFromClientSide { assertCell(c3, ROWS[1], FAMILIES[0], QUALIFIERS[1]); } + @Test + public void testDontThrowUnknowScannerExceptionToClient() throws Exception { + Table table = + createTestTable(TableName.valueOf("testDontThrowUnknowScannerException"), ROWS, FAMILIES, + QUALIFIERS, VALUE); + Scan scan = new Scan(); + scan.setCaching(1); + ResultScanner scanner = table.getScanner(scan); + scanner.next(); + Thread.sleep(timeout * 3); + int count = 1; + while (scanner.next() != null) { + count++; + } + assertEquals(NUM_ROWS, count); + scanner.close(); + } } \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestScannerTimeout.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestScannerTimeout.java index 2fb3aff389a..90956ac08c4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestScannerTimeout.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestScannerTimeout.java @@ -92,38 +92,6 @@ public class TestScannerTimeout { TEST_UTIL.ensureSomeNonStoppedRegionServersAvailable(2); } - /** - * Test that we do get a ScannerTimeoutException - * @throws Exception - */ - @Test(timeout=300000) - public void test2481() throws Exception { - LOG.info("START ************ test2481"); - Scan scan = new Scan(); - scan.setCaching(1); - Table table = TEST_UTIL.getConnection().getTable(TABLE_NAME); - ResultScanner r = table.getScanner(scan); - int count = 0; - try { - Result res = r.next(); - while (res != null) { - count++; - if (count == 5) { - // Sleep just a bit more to be sure - Thread.sleep(SCANNER_TIMEOUT + THREAD_WAKE_FREQUENCY + 100); - } - res = r.next(); - } - } catch (ScannerTimeoutException e) { - LOG.info("Got the timeout " + e.getMessage(), e); - return; - } finally { - table.close(); - } - fail("We should be timing out"); - LOG.info("END ************ test2481"); - } - /** * Test that scanner can continue even if the region server it was reading * from failed. Before 2772, it reused the same scanner id.