HBASE-8531 TestZooKeeper fails in trunk/0.95 builds
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1482890 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0a183765ac
commit
f19b0360c1
|
@ -285,11 +285,13 @@ public class ClientScanner extends AbstractClientScanner {
|
||||||
values = callable.withRetries();
|
values = callable.withRetries();
|
||||||
retryAfterOutOfOrderException = true;
|
retryAfterOutOfOrderException = true;
|
||||||
} catch (DoNotRetryIOException e) {
|
} catch (DoNotRetryIOException e) {
|
||||||
|
// DNRIOEs are thrown to make us break out of retries. Some types of DNRIOEs want us
|
||||||
|
// to reset the scanner and come back in again.
|
||||||
if (e instanceof UnknownScannerException) {
|
if (e instanceof UnknownScannerException) {
|
||||||
long timeout = lastNext + scannerTimeout;
|
long timeout = lastNext + scannerTimeout;
|
||||||
// If we are over the timeout, throw this exception to the client
|
// If we are over the timeout, throw this exception to the client wrapped in
|
||||||
// Else, it's because the region moved and we used the old id
|
// a ScannerTimeoutException. Else, it's because the region moved and we used the old
|
||||||
// against the new region server; reset the scanner.
|
// id against the new region server; reset the scanner.
|
||||||
if (timeout < System.currentTimeMillis()) {
|
if (timeout < System.currentTimeMillis()) {
|
||||||
long elapsed = System.currentTimeMillis() - lastNext;
|
long elapsed = System.currentTimeMillis() - lastNext;
|
||||||
ScannerTimeoutException ex = new ScannerTimeoutException(
|
ScannerTimeoutException ex = new ScannerTimeoutException(
|
||||||
|
@ -299,16 +301,20 @@ public class ClientScanner extends AbstractClientScanner {
|
||||||
throw ex;
|
throw ex;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// If exception is any but the list below throw it back to the client; else setup
|
||||||
|
// the scanner and retry.
|
||||||
Throwable cause = e.getCause();
|
Throwable cause = e.getCause();
|
||||||
if ((cause == null || (!(cause instanceof NotServingRegionException)
|
if ((cause != null && cause instanceof NotServingRegionException) ||
|
||||||
&& !(cause instanceof RegionServerStoppedException))) &&
|
(cause != null && cause instanceof RegionServerStoppedException) ||
|
||||||
!(e instanceof RegionServerStoppedException) &&
|
e instanceof OutOfOrderScannerNextException) {
|
||||||
!(e instanceof OutOfOrderScannerNextException)) {
|
// Pass
|
||||||
|
// It is easier writing the if loop test as list of what is allowed rather than
|
||||||
|
// as a list of what is not allowed... so if in here, it means we do not throw.
|
||||||
|
} else {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Else, its signal from depths of ScannerCallable that we got an
|
// Else, its signal from depths of ScannerCallable that we need to reset the scanner.
|
||||||
// NSRE on a next and that we need to reset the scanner.
|
|
||||||
if (this.lastResult != null) {
|
if (this.lastResult != null) {
|
||||||
this.scan.setStartRow(this.lastResult.getRow());
|
this.scan.setStartRow(this.lastResult.getRow());
|
||||||
// Skip first row returned. We already let it out on previous
|
// Skip first row returned. We already let it out on previous
|
||||||
|
@ -319,13 +325,17 @@ public class ClientScanner extends AbstractClientScanner {
|
||||||
if (retryAfterOutOfOrderException) {
|
if (retryAfterOutOfOrderException) {
|
||||||
retryAfterOutOfOrderException = false;
|
retryAfterOutOfOrderException = false;
|
||||||
} else {
|
} else {
|
||||||
|
// TODO: Why wrap this in a DNRIOE when it already is a DNRIOE?
|
||||||
throw new DoNotRetryIOException("Failed after retry of " +
|
throw new DoNotRetryIOException("Failed after retry of " +
|
||||||
"OutOfOrderScannerNextException: was there a rpc timeout?", e);
|
"OutOfOrderScannerNextException: was there a rpc timeout?", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Clear region
|
// Clear region.
|
||||||
this.currentRegion = null;
|
this.currentRegion = null;
|
||||||
|
// Set this to zero so we don't try and do an rpc and close on remote server when
|
||||||
|
// the exception we got was UnknownScanner or the Server is going down.
|
||||||
callable = null;
|
callable = null;
|
||||||
|
// This continue will take us to while at end of loop where we will set up new scanner.
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
long currentTime = System.currentTimeMillis();
|
long currentTime = System.currentTimeMillis();
|
||||||
|
|
|
@ -140,8 +140,7 @@ public class ScannerCallable extends ServerCallable<Result[]> {
|
||||||
ScanRequest request = null;
|
ScanRequest request = null;
|
||||||
try {
|
try {
|
||||||
incRPCcallsMetrics();
|
incRPCcallsMetrics();
|
||||||
request =
|
request = RequestConverter.buildScanRequest(scannerId, caching, false, nextCallSeq);
|
||||||
RequestConverter.buildScanRequest(scannerId, caching, false, nextCallSeq);
|
|
||||||
ScanResponse response = null;
|
ScanResponse response = null;
|
||||||
try {
|
try {
|
||||||
response = stub.scan(null, request);
|
response = stub.scan(null, request);
|
||||||
|
@ -194,14 +193,24 @@ public class ScannerCallable extends ServerCallable<Result[]> {
|
||||||
LOG.info("Failed to relocate region", t);
|
LOG.info("Failed to relocate region", t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// The below convertion of exceptions into DoNotRetryExceptions is a little strange.
|
||||||
|
// Why not just have these exceptions implment DNRIOE you ask? Well, usually we want
|
||||||
|
// ServerCallable#withRetries to just retry when it gets these exceptions. In here in
|
||||||
|
// a scan when doing a next in particular, we want to break out and get the scanner to
|
||||||
|
// reset itself up again. Throwing a DNRIOE is how we signal this to happen (its ugly,
|
||||||
|
// yeah and hard to follow and in need of a refactor).
|
||||||
if (ioe instanceof NotServingRegionException) {
|
if (ioe instanceof NotServingRegionException) {
|
||||||
// Throw a DNRE so that we break out of cycle of calling NSRE
|
// Throw a DNRE so that we break out of cycle of calling NSRE
|
||||||
// when what we need is to open scanner against new location.
|
// when what we need is to open scanner against new location.
|
||||||
// Attach NSRE to signal client that it needs to resetup scanner.
|
// Attach NSRE to signal client that it needs to re-setup scanner.
|
||||||
if (this.scanMetrics != null) {
|
if (this.scanMetrics != null) {
|
||||||
this.scanMetrics.countOfNSRE.incrementAndGet();
|
this.scanMetrics.countOfNSRE.incrementAndGet();
|
||||||
}
|
}
|
||||||
throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe);
|
throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe);
|
||||||
|
} else if (ioe instanceof RegionServerStoppedException) {
|
||||||
|
// Throw a DNRE so that we break out of cycle of the retries and instead go and
|
||||||
|
// open scanner against new location.
|
||||||
|
throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe);
|
||||||
} else {
|
} else {
|
||||||
// The outer layers will retry
|
// The outer layers will retry
|
||||||
throw ioe;
|
throw ioe;
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.exceptions;
|
package org.apache.hadoop.hbase.exceptions;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -25,8 +27,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("serial")
|
@SuppressWarnings("serial")
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class RegionServerStoppedException extends DoNotRetryIOException {
|
public class RegionServerStoppedException extends IOException {
|
||||||
|
|
||||||
public RegionServerStoppedException(String s) {
|
public RegionServerStoppedException(String s) {
|
||||||
super(s);
|
super(s);
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,7 +52,6 @@ public class TestClientNoCluster {
|
||||||
// Run my HConnection overrides. Use my little HConnectionImplementation below which
|
// Run my HConnection overrides. Use my little HConnectionImplementation below which
|
||||||
// allows me insert mocks and also use my Registry below rather than the default zk based
|
// allows me insert mocks and also use my Registry below rather than the default zk based
|
||||||
// one so tests run faster and don't have zk dependency.
|
// one so tests run faster and don't have zk dependency.
|
||||||
this.conf.set("hbase.client.connection.impl", NoClusterConnection.class.getName());
|
|
||||||
this.conf.set("hbase.client.registry.impl", SimpleRegistry.class.getName());
|
this.conf.set("hbase.client.registry.impl", SimpleRegistry.class.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,11 +89,35 @@ public class TestClientNoCluster {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDoNotRetryMetaScanner() throws IOException {
|
public void testDoNotRetryMetaScanner() throws IOException {
|
||||||
|
this.conf.set("hbase.client.connection.impl",
|
||||||
|
RegionServerStoppedOnScannerOpenConnection.class.getName());
|
||||||
MetaScanner.metaScan(this.conf, null);
|
MetaScanner.metaScan(this.conf, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDoNotRetryOnScan() throws IOException {
|
public void testDoNotRetryOnScanNext() throws IOException {
|
||||||
|
this.conf.set("hbase.client.connection.impl",
|
||||||
|
RegionServerStoppedOnScannerOpenConnection.class.getName());
|
||||||
|
// Go against meta else we will try to find first region for the table on construction which
|
||||||
|
// means we'll have to do a bunch more mocking. Tests that go against meta only should be
|
||||||
|
// good for a bit of testing.
|
||||||
|
HTable table = new HTable(this.conf, HConstants.META_TABLE_NAME);
|
||||||
|
ResultScanner scanner = table.getScanner(HConstants.CATALOG_FAMILY);
|
||||||
|
try {
|
||||||
|
Result result = null;
|
||||||
|
while ((result = scanner.next()) != null) {
|
||||||
|
LOG.info(result);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
scanner.close();
|
||||||
|
table.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRegionServerStoppedOnScannerOpen() throws IOException {
|
||||||
|
this.conf.set("hbase.client.connection.impl",
|
||||||
|
RegionServerStoppedOnScannerOpenConnection.class.getName());
|
||||||
// Go against meta else we will try to find first region for the table on construction which
|
// Go against meta else we will try to find first region for the table on construction which
|
||||||
// means we'll have to do a bunch more mocking. Tests that go against meta only should be
|
// means we'll have to do a bunch more mocking. Tests that go against meta only should be
|
||||||
// good for a bit of testing.
|
// good for a bit of testing.
|
||||||
|
@ -114,10 +137,44 @@ public class TestClientNoCluster {
|
||||||
/**
|
/**
|
||||||
* Override to shutdown going to zookeeper for cluster id and meta location.
|
* Override to shutdown going to zookeeper for cluster id and meta location.
|
||||||
*/
|
*/
|
||||||
static class NoClusterConnection extends HConnectionManager.HConnectionImplementation {
|
static class ScanOpenNextThenExceptionThenRecoverConnection
|
||||||
|
extends HConnectionManager.HConnectionImplementation {
|
||||||
final ClientService.BlockingInterface stub;
|
final ClientService.BlockingInterface stub;
|
||||||
|
|
||||||
NoClusterConnection(Configuration conf, boolean managed) throws IOException {
|
ScanOpenNextThenExceptionThenRecoverConnection(Configuration conf,
|
||||||
|
boolean managed) throws IOException {
|
||||||
|
super(conf, managed);
|
||||||
|
// Mock up my stub so open scanner returns a scanner id and then on next, we throw
|
||||||
|
// exceptions for three times and then after that, we return no more to scan.
|
||||||
|
this.stub = Mockito.mock(ClientService.BlockingInterface.class);
|
||||||
|
long sid = 12345L;
|
||||||
|
try {
|
||||||
|
Mockito.when(stub.scan((RpcController)Mockito.any(),
|
||||||
|
(ClientProtos.ScanRequest)Mockito.any())).
|
||||||
|
thenReturn(ClientProtos.ScanResponse.newBuilder().setScannerId(sid).build()).
|
||||||
|
thenThrow(new ServiceException(new RegionServerStoppedException("From Mockito"))).
|
||||||
|
thenReturn(ClientProtos.ScanResponse.newBuilder().setScannerId(sid).
|
||||||
|
setMoreResults(false).build());
|
||||||
|
} catch (ServiceException e) {
|
||||||
|
throw new IOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BlockingInterface getClient(ServerName sn) throws IOException {
|
||||||
|
return this.stub;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Override to shutdown going to zookeeper for cluster id and meta location.
|
||||||
|
*/
|
||||||
|
static class RegionServerStoppedOnScannerOpenConnection
|
||||||
|
extends HConnectionManager.HConnectionImplementation {
|
||||||
|
final ClientService.BlockingInterface stub;
|
||||||
|
|
||||||
|
RegionServerStoppedOnScannerOpenConnection(Configuration conf, boolean managed)
|
||||||
|
throws IOException {
|
||||||
super(conf, managed);
|
super(conf, managed);
|
||||||
// Mock up my stub so open scanner returns a scanner id and then on next, we throw
|
// Mock up my stub so open scanner returns a scanner id and then on next, we throw
|
||||||
// exceptions for three times and then after that, we return no more to scan.
|
// exceptions for three times and then after that, we return no more to scan.
|
||||||
|
|
Loading…
Reference in New Issue