HBASE-3686 ClientScanner skips too many rows on recovery if using scanner caching

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1085542 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-03-25 19:54:48 +00:00
parent 0adc6ef09e
commit 83143be819
3 changed files with 66 additions and 5 deletions

View File

@ -185,6 +185,8 @@ Release 0.90.2 - Unreleased
HBASE-3666 TestScannerTimeout fails occasionally
HBASE-3497 TableMapReduceUtil.initTableReducerJob broken due to setConf
method in TableOutputFormat
HBASE-3686 ClientScanner skips too many rows on recovery if using scanner
caching (Sean Sechrist via Stack)
IMPROVEMENTS
HBASE-3542 MultiGet methods in Thrift

View File

@ -1101,15 +1101,18 @@ public class HTable implements HTableInterface {
boolean skipFirst = false;
do {
try {
if (skipFirst) {
// Skip only the first row (which was the last row of the last
// already-processed batch).
callable.setCaching(1);
values = getConnection().getRegionServerWithRetries(callable);
callable.setCaching(this.caching);
skipFirst = false;
}
// Server returns a null values if scanning is to stop. Else,
// returns an empty array if scanning is to go on and we've just
// exhausted current region.
values = getConnection().getRegionServerWithRetries(callable);
if (skipFirst) {
skipFirst = false;
// Reget.
values = getConnection().getRegionServerWithRetries(callable);
}
} catch (DoNotRetryIOException e) {
if (e instanceof UnknownScannerException) {
long timeout = lastNext + scannerTimeout;

View File

@ -49,6 +49,7 @@ public class TestScannerTimeout {
// Be careful w/ what you set this timer too... it can get in the way of
// the mini cluster coming up -- the verification in particular.
private final static int SCANNER_TIMEOUT = 10000;
private final static int SCANNER_CACHING = 5;
/**
* @throws java.lang.Exception
@ -134,4 +135,59 @@ public class TestScannerTimeout {
assertEquals(NB_ROWS, results.length);
r.close();
}
/**
* Test that scanner won't miss any rows if the region server it was reading
* from failed. Before 3686, it would skip rows in the scan.
* @throws Exception
*/
@Test
public void test3686a() throws Exception {
HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME);
Scan scan = new Scan();
scan.setCaching(SCANNER_CACHING);
HTable table = new HTable(TABLE_NAME);
ResultScanner r = table.getScanner(scan);
int count = 1;
r.next();
// Kill after one call to next(), which got 5 rows.
rs.abort("die!");
while(r.next() != null) {
count ++;
}
assertEquals(NB_ROWS, count);
r.close();
}
/**
* Make sure that no rows are lost if the scanner timeout is longer on the
* client than the server, and the scan times out on the server but not the
* client.
* @throws Exception
*/
@Test
public void test3686b() throws Exception {
HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME);
Scan scan = new Scan();
scan.setCaching(SCANNER_CACHING);
// Set a very high timeout, we want to test what happens when a RS
// fails but the region is recovered before the lease times out.
// Since the RS is already created, this conf is client-side only for
// this new table
Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
conf.setInt(
HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY, SCANNER_TIMEOUT*100);
HTable higherScanTimeoutTable = new HTable(conf, TABLE_NAME);
ResultScanner r = higherScanTimeoutTable.getScanner(scan);
int count = 1;
r.next();
// Sleep, allowing the scan to timeout on the server but not on the client.
Thread.sleep(SCANNER_TIMEOUT+2000);
while(r.next() != null) {
count ++;
}
assertEquals(NB_ROWS, count);
r.close();
}
}