HBASE-16266 Do not throw ScannerTimeoutException when catch UnknownScannerException

Signed-off-by: zhangduo <zhangduo@apache.org>
This commit is contained in:
Phil Yang 2016-07-22 11:58:48 +08:00 committed by zhangduo
parent bdd7782f05
commit 6dbce2a8cb
3 changed files with 32 additions and 58 deletions

View File

@ -421,34 +421,20 @@ public abstract class ClientScanner extends AbstractClientScanner {
clearPartialResults();
// DNRIOEs are thrown to make us break out of retries. Some types of DNRIOEs want us
// to reset the scanner and come back in again.
if (e instanceof UnknownScannerException) {
long timeout = lastNext + scannerTimeout;
// If we are over the timeout, throw this exception to the client wrapped in
// a ScannerTimeoutException. Else, it's because the region moved and we used the old
// id against the new region server; reset the scanner.
if (timeout < System.currentTimeMillis()) {
LOG.info("For hints related to the following exception, please try taking a look at: "
+ "https://hbase.apache.org/book.html#trouble.client.scantimeout");
long elapsed = System.currentTimeMillis() - lastNext;
ScannerTimeoutException ex =
new ScannerTimeoutException(elapsed + "ms passed since the last invocation, "
+ "timeout is currently set to " + scannerTimeout);
ex.initCause(e);
throw ex;
}
} else {
// If exception is any but the list below throw it back to the client; else setup
// the scanner and retry.
Throwable cause = e.getCause();
if ((cause != null && cause instanceof NotServingRegionException) ||
(cause != null && cause instanceof RegionServerStoppedException) ||
e instanceof OutOfOrderScannerNextException) {
e instanceof OutOfOrderScannerNextException ||
e instanceof UnknownScannerException ) {
// Pass. It is easier writing the if loop test as list of what is allowed rather than
// as a list of what is not allowed... so if in here, it means we do not throw.
} else {
throw e;
}
}
// Else, its signal from depths of ScannerCallable that we need to reset the scanner.
if (this.lastResult != null) {
// The region has moved. We need to open a brand new scanner at the new location.

View File

@ -100,8 +100,11 @@ public class TestPartialResultsFromClientSide {
// getCellHeapSize().
private static long CELL_HEAP_SIZE = -1;
private static long timeout = 2000;
@BeforeClass
public static void setUpBeforeClass() throws Exception {
TEST_UTIL.getConfiguration().setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, timeout);
TEST_UTIL.startMiniCluster(MINICLUSTER_SIZE);
TEST_UTIL.getAdmin().setBalancerRunning(false, true);
TABLE = createTestTable(TABLE_NAME, ROWS, FAMILIES, QUALIFIERS, VALUE);
@ -1044,5 +1047,22 @@ public class TestPartialResultsFromClientSide {
assertCell(c3, ROWS[1], FAMILIES[0], QUALIFIERS[1]);
}
@Test
public void testDontThrowUnknowScannerExceptionToClient() throws Exception {
Table table =
createTestTable(TableName.valueOf("testDontThrowUnknowScannerException"), ROWS, FAMILIES,
QUALIFIERS, VALUE);
Scan scan = new Scan();
scan.setCaching(1);
ResultScanner scanner = table.getScanner(scan);
scanner.next();
Thread.sleep(timeout * 3);
int count = 1;
while (scanner.next() != null) {
count++;
}
assertEquals(NUM_ROWS, count);
scanner.close();
}
}

View File

@ -92,38 +92,6 @@ public class TestScannerTimeout {
TEST_UTIL.ensureSomeNonStoppedRegionServersAvailable(2);
}
/**
* Test that we do get a ScannerTimeoutException
* @throws Exception
*/
@Test(timeout=300000)
public void test2481() throws Exception {
LOG.info("START ************ test2481");
Scan scan = new Scan();
scan.setCaching(1);
Table table = TEST_UTIL.getConnection().getTable(TABLE_NAME);
ResultScanner r = table.getScanner(scan);
int count = 0;
try {
Result res = r.next();
while (res != null) {
count++;
if (count == 5) {
// Sleep just a bit more to be sure
Thread.sleep(SCANNER_TIMEOUT + THREAD_WAKE_FREQUENCY + 100);
}
res = r.next();
}
} catch (ScannerTimeoutException e) {
LOG.info("Got the timeout " + e.getMessage(), e);
return;
} finally {
table.close();
}
fail("We should be timing out");
LOG.info("END ************ test2481");
}
/**
* Test that scanner can continue even if the region server it was reading
* from failed. Before 2772, it reused the same scanner id.