HBASE-4433 avoid extra next (potentially a seek) if done with column/row (kannan via jgray)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1176202 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Gray 2011-09-27 05:29:57 +00:00
parent f261595ae0
commit 9bfdd1d91c
7 changed files with 90 additions and 39 deletions

View File

@ -4,6 +4,8 @@ Release 0.93.0 - Unreleased
HBASE-4132 Extend the WALActionsListener API to accomodate log archival HBASE-4132 Extend the WALActionsListener API to accomodate log archival
(dhruba borthakur) (dhruba borthakur)
HBASE-4461 Expose getRowOrBefore via Thrift (jgray) HBASE-4461 Expose getRowOrBefore via Thrift (jgray)
HBASE-4433 avoid extra next (potentially a seek) if done with column/row
(kannan via jgray)
BUGS BUGS

View File

@ -50,6 +50,12 @@ public class ExplicitColumnTracker implements ColumnTracker {
private final int maxVersions; private final int maxVersions;
private final int minVersions; private final int minVersions;
/**
* Contains the list of columns that the ExplicitColumnTracker is tracking.
* Each ColumnCount instance also tracks how many versions of the requested
* column have been returned.
*/
private final List<ColumnCount> columns; private final List<ColumnCount> columns;
private final List<ColumnCount> columnsToReuse; private final List<ColumnCount> columnsToReuse;
private int index; private int index;
@ -127,13 +133,22 @@ public class ExplicitColumnTracker implements ColumnTracker {
int count = this.column.increment(); int count = this.column.increment();
if(count >= maxVersions || (count >= minVersions && isExpired(timestamp))) { if(count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
// Done with versions for this column // Done with versions for this column
// Note: because we are done with this column, and are removing
// it from columns, we don't do a ++this.index. The index stays
// the same but the columns have shifted within the array such
// that index now points to the next column we are interested in.
this.columns.remove(this.index); this.columns.remove(this.index);
resetTS(); resetTS();
if(this.columns.size() == this.index) { if (this.columns.size() == this.index) {
// Will not hit any more columns in this storefile // We have served all the requested columns.
this.column = null; this.column = null;
return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
} else { } else {
// We are done with current column; advance to next column
// of interest.
this.column = this.columns.get(this.index); this.column = this.columns.get(this.index);
return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
} }
} else { } else {
setTS(timestamp); setTS(timestamp);
@ -144,15 +159,18 @@ public class ExplicitColumnTracker implements ColumnTracker {
resetTS(); resetTS();
if (ret > 0) { if (ret > 0) {
// Specified column is smaller than the current, skip to next column. // The current KV is smaller than the column the ExplicitColumnTracker
// is interested in, so seek to that column of interest.
return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
} }
// Specified column is bigger than current column // The current KV is bigger than the column the ExplicitColumnTracker
// Move down current column and check again // is interested in. That means there is no more data for the column
if(ret <= -1) { // of interest. Advance the ExplicitColumnTracker state to next
if(++this.index >= this.columns.size()) { // column of interest, and check again.
// No more to match, do not include, done with storefile if (ret <= -1) {
if (++this.index >= this.columns.size()) {
// No more to match, do not include, done with this row.
return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
} }
// This is the recursive case. // This is the recursive case.

View File

@ -352,5 +352,15 @@ public class ScanQueryMatcher {
* Seek to next key which is given as hint. * Seek to next key which is given as hint.
*/ */
SEEK_NEXT_USING_HINT, SEEK_NEXT_USING_HINT,
/**
* Include KeyValue and done with column, seek to next.
*/
INCLUDE_AND_SEEK_NEXT_COL,
/**
* Include KeyValue and done with row, seek to next.
*/
INCLUDE_AND_SEEK_NEXT_ROW,
} }
} }

View File

@ -258,8 +258,23 @@ class StoreScanner implements KeyValueScanner, InternalScanner, ChangedReadersOb
//DebugPrint.println("SS peek kv = " + kv + " with qcode = " + qcode); //DebugPrint.println("SS peek kv = " + kv + " with qcode = " + qcode);
switch(qcode) { switch(qcode) {
case INCLUDE: case INCLUDE:
case INCLUDE_AND_SEEK_NEXT_ROW:
case INCLUDE_AND_SEEK_NEXT_COL:
results.add(copyKv); results.add(copyKv);
if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW) {
if (!matcher.moreRowsMayExistAfter(kv)) {
outResult.addAll(results);
return false;
}
reseek(matcher.getKeyForNextRow(kv));
} else if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL) {
reseek(matcher.getKeyForNextColumn(kv));
} else {
this.heap.next(); this.heap.next();
}
if (limit > 0 && (results.size() == limit)) { if (limit > 0 && (results.size() == limit)) {
break LOOP; break LOOP;
} }

View File

@ -201,8 +201,8 @@ public class TestBlocksRead extends HBaseTestCase {
putData(FAMILY, "row", "col7", 7); putData(FAMILY, "row", "col7", 7);
region.flushcache(); region.flushcache();
// Expected block reads: 2 // Expected block reads: 1
kvs = getData(FAMILY, "row", "col1", 2); kvs = getData(FAMILY, "row", "col1", 1);
assertEquals(1, kvs.length); assertEquals(1, kvs.length);
verifyData(kvs[0], "row", "col1", 1); verifyData(kvs[0], "row", "col1", 1);
@ -218,8 +218,8 @@ public class TestBlocksRead extends HBaseTestCase {
verifyData(kvs[0], "row", "col2", 2); verifyData(kvs[0], "row", "col2", 2);
verifyData(kvs[1], "row", "col3", 3); verifyData(kvs[1], "row", "col3", 3);
// Expected block reads: 4 // Expected block reads: 3
kvs = getData(FAMILY, "row", Arrays.asList("col5"), 4); kvs = getData(FAMILY, "row", Arrays.asList("col5"), 3);
assertEquals(1, kvs.length); assertEquals(1, kvs.length);
verifyData(kvs[0], "row", "col5", 5); verifyData(kvs[0], "row", "col5", 5);
} }
@ -248,15 +248,16 @@ public class TestBlocksRead extends HBaseTestCase {
putData(FAMILY, "row", "col2", 4); putData(FAMILY, "row", "col2", 4);
region.flushcache(); region.flushcache();
// Baseline expected blocks read: 3 // Baseline expected blocks read: 2
kvs = getData(FAMILY, "row", Arrays.asList("col1"), 3); kvs = getData(FAMILY, "row", Arrays.asList("col1"), 2);
assertEquals(1, kvs.length); assertEquals(1, kvs.length);
verifyData(kvs[0], "row", "col1", 3); verifyData(kvs[0], "row", "col1", 3);
// Baseline expected blocks read: 5 // Baseline expected blocks read: 6
// This increase is a minor glitch due to: HBASE-4466. Once that // This increase is a minor glitch due to: HBASE-4466. Once that
// is fixed this will drop back. The extra access will be a cache hit. // is fixed this will drop back. The extra access will be a cache
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 5); // hit.
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 6);
assertEquals(2, kvs.length); assertEquals(2, kvs.length);
verifyData(kvs[0], "row", "col1", 3); verifyData(kvs[0], "row", "col1", 3);
verifyData(kvs[1], "row", "col2", 4); verifyData(kvs[1], "row", "col2", 4);
@ -271,8 +272,8 @@ public class TestBlocksRead extends HBaseTestCase {
verifyData(kvs[0], "row", "col3", 5); verifyData(kvs[0], "row", "col3", 5);
// Get a column from older file. // Get a column from older file.
// Baseline expected blocks read: 4 // Baseline expected blocks read: 3
kvs = getData(FAMILY, "row", Arrays.asList("col1"), 4); kvs = getData(FAMILY, "row", Arrays.asList("col1"), 3);
assertEquals(1, kvs.length); assertEquals(1, kvs.length);
verifyData(kvs[0], "row", "col1", 3); verifyData(kvs[0], "row", "col1", 3);
@ -290,10 +291,12 @@ public class TestBlocksRead extends HBaseTestCase {
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 6); kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 6);
assertEquals(0, kvs.length); assertEquals(0, kvs.length);
// File 5: Delete with post data timestamp and insert some older // File 5: Delete
// date in new files.
deleteFamily(FAMILY, "row", 10); deleteFamily(FAMILY, "row", 10);
region.flushcache(); region.flushcache();
// File 6: some more puts, but with timestamps older than the
// previous delete.
putData(FAMILY, "row", "col1", 7); putData(FAMILY, "row", "col1", 7);
putData(FAMILY, "row", "col2", 8); putData(FAMILY, "row", "col2", 8);
putData(FAMILY, "row", "col3", 9); putData(FAMILY, "row", "col3", 9);
@ -303,14 +306,17 @@ public class TestBlocksRead extends HBaseTestCase {
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 10); kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 10);
assertEquals(0, kvs.length); assertEquals(0, kvs.length);
// File 6: Put back new data // File 7: Put back new data
putData(FAMILY, "row", "col1", 11); putData(FAMILY, "row", "col1", 11);
putData(FAMILY, "row", "col2", 12); putData(FAMILY, "row", "col2", 12);
putData(FAMILY, "row", "col3", 13); putData(FAMILY, "row", "col3", 13);
region.flushcache(); region.flushcache();
// Baseline expected blocks read: 13 // Baseline expected blocks read: 21
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 13); // This increase is a minor glitch due to: HBASE-4466. Once that
// is fixed this will drop back. The extra access will be a cache
// hit. The test case only has 13 blocks altogther!
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 21);
assertEquals(3, kvs.length); assertEquals(3, kvs.length);
verifyData(kvs[0], "row", "col1", 11); verifyData(kvs[0], "row", "col1", 11);
verifyData(kvs[1], "row", "col2", 12); verifyData(kvs[1], "row", "col2", 12);

View File

@ -77,11 +77,11 @@ public class TestExplicitColumnTracker extends HBaseTestCase {
columns.add(col2); columns.add(col2);
columns.add(col4); columns.add(col4);
List<MatchCode> expected = new ArrayList<ScanQueryMatcher.MatchCode>(); List<MatchCode> expected = new ArrayList<ScanQueryMatcher.MatchCode>();
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); // col1
expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL); // col2
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); // col3
expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW); // col4
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW); // col5
int maxVersions = 1; int maxVersions = 1;
//Create "Scanner" //Create "Scanner"
@ -111,16 +111,16 @@ public class TestExplicitColumnTracker extends HBaseTestCase {
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE); // col2; 1st version
expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL); // col2; 2nd version
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE); // col4; 1st version
expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW); // col4; 2nd version
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW);

View File

@ -88,10 +88,10 @@ public class TestQueryMatcher extends HBaseTestCase {
//Expected result //Expected result
List<MatchCode> expected = new ArrayList<ScanQueryMatcher.MatchCode>(); List<MatchCode> expected = new ArrayList<ScanQueryMatcher.MatchCode>();
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL); expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.INCLUDE); expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW);
expected.add(ScanQueryMatcher.MatchCode.DONE); expected.add(ScanQueryMatcher.MatchCode.DONE);
// 2,4,5 // 2,4,5
@ -182,9 +182,9 @@ public class TestQueryMatcher extends HBaseTestCase {
long testTTL = 1000; long testTTL = 1000;
MatchCode [] expected = new MatchCode[] { MatchCode [] expected = new MatchCode[] {
ScanQueryMatcher.MatchCode.SEEK_NEXT_COL, ScanQueryMatcher.MatchCode.SEEK_NEXT_COL,
ScanQueryMatcher.MatchCode.INCLUDE, ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL,
ScanQueryMatcher.MatchCode.SEEK_NEXT_COL, ScanQueryMatcher.MatchCode.SEEK_NEXT_COL,
ScanQueryMatcher.MatchCode.INCLUDE, ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL,
ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW, ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW,
ScanQueryMatcher.MatchCode.DONE ScanQueryMatcher.MatchCode.DONE
}; };