HBASE-8531 TestZooKeeper fails in trunk/0.95 builds

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1482890 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2013-05-15 15:09:37 +00:00
parent 0a183765ac
commit f19b0360c1
4 changed files with 96 additions and 19 deletions

View File

@ -285,11 +285,13 @@ public class ClientScanner extends AbstractClientScanner {
values = callable.withRetries(); values = callable.withRetries();
retryAfterOutOfOrderException = true; retryAfterOutOfOrderException = true;
} catch (DoNotRetryIOException e) { } catch (DoNotRetryIOException e) {
// DNRIOEs are thrown to make us break out of retries. Some types of DNRIOEs want us
// to reset the scanner and come back in again.
if (e instanceof UnknownScannerException) { if (e instanceof UnknownScannerException) {
long timeout = lastNext + scannerTimeout; long timeout = lastNext + scannerTimeout;
// If we are over the timeout, throw this exception to the client // If we are over the timeout, throw this exception to the client wrapped in
// Else, it's because the region moved and we used the old id // a ScannerTimeoutException. Else, it's because the region moved and we used the old
// against the new region server; reset the scanner. // id against the new region server; reset the scanner.
if (timeout < System.currentTimeMillis()) { if (timeout < System.currentTimeMillis()) {
long elapsed = System.currentTimeMillis() - lastNext; long elapsed = System.currentTimeMillis() - lastNext;
ScannerTimeoutException ex = new ScannerTimeoutException( ScannerTimeoutException ex = new ScannerTimeoutException(
@ -299,16 +301,20 @@ public class ClientScanner extends AbstractClientScanner {
throw ex; throw ex;
} }
} else { } else {
// If exception is any but the list below throw it back to the client; else setup
// the scanner and retry.
Throwable cause = e.getCause(); Throwable cause = e.getCause();
if ((cause == null || (!(cause instanceof NotServingRegionException) if ((cause != null && cause instanceof NotServingRegionException) ||
&& !(cause instanceof RegionServerStoppedException))) && (cause != null && cause instanceof RegionServerStoppedException) ||
!(e instanceof RegionServerStoppedException) && e instanceof OutOfOrderScannerNextException) {
!(e instanceof OutOfOrderScannerNextException)) { // Pass
// It is easier writing the if loop test as list of what is allowed rather than
// as a list of what is not allowed... so if in here, it means we do not throw.
} else {
throw e; throw e;
} }
} }
// Else, its signal from depths of ScannerCallable that we got an // Else, its signal from depths of ScannerCallable that we need to reset the scanner.
// NSRE on a next and that we need to reset the scanner.
if (this.lastResult != null) { if (this.lastResult != null) {
this.scan.setStartRow(this.lastResult.getRow()); this.scan.setStartRow(this.lastResult.getRow());
// Skip first row returned. We already let it out on previous // Skip first row returned. We already let it out on previous
@ -319,13 +325,17 @@ public class ClientScanner extends AbstractClientScanner {
if (retryAfterOutOfOrderException) { if (retryAfterOutOfOrderException) {
retryAfterOutOfOrderException = false; retryAfterOutOfOrderException = false;
} else { } else {
// TODO: Why wrap this in a DNRIOE when it already is a DNRIOE?
throw new DoNotRetryIOException("Failed after retry of " + throw new DoNotRetryIOException("Failed after retry of " +
"OutOfOrderScannerNextException: was there a rpc timeout?", e); "OutOfOrderScannerNextException: was there a rpc timeout?", e);
} }
} }
// Clear region // Clear region.
this.currentRegion = null; this.currentRegion = null;
// Set this to zero so we don't try and do an rpc and close on remote server when
// the exception we got was UnknownScanner or the Server is going down.
callable = null; callable = null;
// This continue will take us to while at end of loop where we will set up new scanner.
continue; continue;
} }
long currentTime = System.currentTimeMillis(); long currentTime = System.currentTimeMillis();

View File

@ -140,8 +140,7 @@ public class ScannerCallable extends ServerCallable<Result[]> {
ScanRequest request = null; ScanRequest request = null;
try { try {
incRPCcallsMetrics(); incRPCcallsMetrics();
request = request = RequestConverter.buildScanRequest(scannerId, caching, false, nextCallSeq);
RequestConverter.buildScanRequest(scannerId, caching, false, nextCallSeq);
ScanResponse response = null; ScanResponse response = null;
try { try {
response = stub.scan(null, request); response = stub.scan(null, request);
@ -194,14 +193,24 @@ public class ScannerCallable extends ServerCallable<Result[]> {
LOG.info("Failed to relocate region", t); LOG.info("Failed to relocate region", t);
} }
} }
// The below convertion of exceptions into DoNotRetryExceptions is a little strange.
// Why not just have these exceptions implment DNRIOE you ask? Well, usually we want
// ServerCallable#withRetries to just retry when it gets these exceptions. In here in
// a scan when doing a next in particular, we want to break out and get the scanner to
// reset itself up again. Throwing a DNRIOE is how we signal this to happen (its ugly,
// yeah and hard to follow and in need of a refactor).
if (ioe instanceof NotServingRegionException) { if (ioe instanceof NotServingRegionException) {
// Throw a DNRE so that we break out of cycle of calling NSRE // Throw a DNRE so that we break out of cycle of calling NSRE
// when what we need is to open scanner against new location. // when what we need is to open scanner against new location.
// Attach NSRE to signal client that it needs to resetup scanner. // Attach NSRE to signal client that it needs to re-setup scanner.
if (this.scanMetrics != null) { if (this.scanMetrics != null) {
this.scanMetrics.countOfNSRE.incrementAndGet(); this.scanMetrics.countOfNSRE.incrementAndGet();
} }
throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe); throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe);
} else if (ioe instanceof RegionServerStoppedException) {
// Throw a DNRE so that we break out of cycle of the retries and instead go and
// open scanner against new location.
throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe);
} else { } else {
// The outer layers will retry // The outer layers will retry
throw ioe; throw ioe;

View File

@ -18,6 +18,8 @@
*/ */
package org.apache.hadoop.hbase.exceptions; package org.apache.hadoop.hbase.exceptions;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
/** /**
@ -25,8 +27,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
*/ */
@SuppressWarnings("serial") @SuppressWarnings("serial")
@InterfaceAudience.Private @InterfaceAudience.Private
public class RegionServerStoppedException extends DoNotRetryIOException { public class RegionServerStoppedException extends IOException {
public RegionServerStoppedException(String s) { public RegionServerStoppedException(String s) {
super(s); super(s);
} }

View File

@ -52,7 +52,6 @@ public class TestClientNoCluster {
// Run my HConnection overrides. Use my little HConnectionImplementation below which // Run my HConnection overrides. Use my little HConnectionImplementation below which
// allows me insert mocks and also use my Registry below rather than the default zk based // allows me insert mocks and also use my Registry below rather than the default zk based
// one so tests run faster and don't have zk dependency. // one so tests run faster and don't have zk dependency.
this.conf.set("hbase.client.connection.impl", NoClusterConnection.class.getName());
this.conf.set("hbase.client.registry.impl", SimpleRegistry.class.getName()); this.conf.set("hbase.client.registry.impl", SimpleRegistry.class.getName());
} }
@ -90,11 +89,35 @@ public class TestClientNoCluster {
@Test @Test
public void testDoNotRetryMetaScanner() throws IOException { public void testDoNotRetryMetaScanner() throws IOException {
this.conf.set("hbase.client.connection.impl",
RegionServerStoppedOnScannerOpenConnection.class.getName());
MetaScanner.metaScan(this.conf, null); MetaScanner.metaScan(this.conf, null);
} }
@Test @Test
public void testDoNotRetryOnScan() throws IOException { public void testDoNotRetryOnScanNext() throws IOException {
this.conf.set("hbase.client.connection.impl",
RegionServerStoppedOnScannerOpenConnection.class.getName());
// Go against meta else we will try to find first region for the table on construction which
// means we'll have to do a bunch more mocking. Tests that go against meta only should be
// good for a bit of testing.
HTable table = new HTable(this.conf, HConstants.META_TABLE_NAME);
ResultScanner scanner = table.getScanner(HConstants.CATALOG_FAMILY);
try {
Result result = null;
while ((result = scanner.next()) != null) {
LOG.info(result);
}
} finally {
scanner.close();
table.close();
}
}
@Test
public void testRegionServerStoppedOnScannerOpen() throws IOException {
this.conf.set("hbase.client.connection.impl",
RegionServerStoppedOnScannerOpenConnection.class.getName());
// Go against meta else we will try to find first region for the table on construction which // Go against meta else we will try to find first region for the table on construction which
// means we'll have to do a bunch more mocking. Tests that go against meta only should be // means we'll have to do a bunch more mocking. Tests that go against meta only should be
// good for a bit of testing. // good for a bit of testing.
@ -114,10 +137,44 @@ public class TestClientNoCluster {
/** /**
* Override to shutdown going to zookeeper for cluster id and meta location. * Override to shutdown going to zookeeper for cluster id and meta location.
*/ */
static class NoClusterConnection extends HConnectionManager.HConnectionImplementation { static class ScanOpenNextThenExceptionThenRecoverConnection
extends HConnectionManager.HConnectionImplementation {
final ClientService.BlockingInterface stub; final ClientService.BlockingInterface stub;
NoClusterConnection(Configuration conf, boolean managed) throws IOException { ScanOpenNextThenExceptionThenRecoverConnection(Configuration conf,
boolean managed) throws IOException {
super(conf, managed);
// Mock up my stub so open scanner returns a scanner id and then on next, we throw
// exceptions for three times and then after that, we return no more to scan.
this.stub = Mockito.mock(ClientService.BlockingInterface.class);
long sid = 12345L;
try {
Mockito.when(stub.scan((RpcController)Mockito.any(),
(ClientProtos.ScanRequest)Mockito.any())).
thenReturn(ClientProtos.ScanResponse.newBuilder().setScannerId(sid).build()).
thenThrow(new ServiceException(new RegionServerStoppedException("From Mockito"))).
thenReturn(ClientProtos.ScanResponse.newBuilder().setScannerId(sid).
setMoreResults(false).build());
} catch (ServiceException e) {
throw new IOException(e);
}
}
@Override
public BlockingInterface getClient(ServerName sn) throws IOException {
return this.stub;
}
}
/**
* Override to shutdown going to zookeeper for cluster id and meta location.
*/
static class RegionServerStoppedOnScannerOpenConnection
extends HConnectionManager.HConnectionImplementation {
final ClientService.BlockingInterface stub;
RegionServerStoppedOnScannerOpenConnection(Configuration conf, boolean managed)
throws IOException {
super(conf, managed); super(conf, managed);
// Mock up my stub so open scanner returns a scanner id and then on next, we throw // Mock up my stub so open scanner returns a scanner id and then on next, we throw
// exceptions for three times and then after that, we return no more to scan. // exceptions for three times and then after that, we return no more to scan.