HBASE-13109 Make better SEEK vs SKIP decisions during scanning.

This commit is contained in:
Lars Hofhansl 2015-03-04 14:03:47 -08:00
parent 883d6fd8e5
commit 464e7ce685
18 changed files with 213 additions and 150 deletions

View File

@ -93,23 +93,6 @@ public class Scan extends Query {
private static final String RAW_ATTR = "_raw_"; private static final String RAW_ATTR = "_raw_";
/**
* EXPERT ONLY.
* An integer (not long) indicating to the scanner logic how many times we attempt to retrieve the
* next KV before we schedule a reseek.
* The right value depends on the size of the average KV. A reseek is more efficient when
* it can skip 5-10 KVs or 512B-1KB, or when the next KV is likely found in another HFile block.
* Setting this only has any effect when columns were added with
* {@link #addColumn(byte[], byte[])}
* <pre>{@code
* Scan s = new Scan(...);
* s.addColumn(...);
* s.setAttribute(Scan.HINT_LOOKAHEAD, Bytes.toBytes(2));
* }</pre>
* Default is 0 (always reseek).
*/
public static final String HINT_LOOKAHEAD = "_look_ahead_";
private byte [] startRow = HConstants.EMPTY_START_ROW; private byte [] startRow = HConstants.EMPTY_START_ROW;
private byte [] stopRow = HConstants.EMPTY_END_ROW; private byte [] stopRow = HConstants.EMPTY_END_ROW;
private int maxVersions = 1; private int maxVersions = 1;

View File

@ -953,7 +953,7 @@ public final class HConstants {
* The byte array represents for NO_NEXT_INDEXED_KEY; * The byte array represents for NO_NEXT_INDEXED_KEY;
* The actual value is irrelevant because this is always compared by reference. * The actual value is irrelevant because this is always compared by reference.
*/ */
public static final byte [] NO_NEXT_INDEXED_KEY = Bytes.toBytes("NO_NEXT_INDEXED_KEY"); public static final Cell NO_NEXT_INDEXED_KEY = new KeyValue();
/** delimiter used between portions of a region name */ /** delimiter used between portions of a region name */
public static final int DELIMITER = ','; public static final int DELIMITER = ',';
public static final String HBASE_CONFIG_READ_ZOOKEEPER_CONFIG = public static final String HBASE_CONFIG_READ_ZOOKEEPER_CONFIG =

View File

@ -268,9 +268,9 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
//// ////
// KeyValue core instance fields. // KeyValue core instance fields.
private byte [] bytes = null; // an immutable byte array that contains the KV protected byte [] bytes = null; // an immutable byte array that contains the KV
private int offset = 0; // offset into bytes buffer KV starts at protected int offset = 0; // offset into bytes buffer KV starts at
private int length = 0; // length of the KV starting from offset. protected int length = 0; // length of the KV starting from offset.
/** /**
* @return True if a delete type, a {@link KeyValue.Type#Delete} or * @return True if a delete type, a {@link KeyValue.Type#Delete} or
@ -1896,6 +1896,58 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
return compareFlatKey(left, 0, left.length, right, 0, right.length); return compareFlatKey(left, 0, left.length, right, 0, right.length);
} }
// compare a key against row/fam/qual/ts/type
public int compareKey(Cell cell,
byte[] row, int roff, int rlen,
byte[] fam, int foff, int flen,
byte[] col, int coff, int clen,
long ts, byte type) {
int compare = compareRows(
cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(),
row, roff, rlen);
if (compare != 0) {
return compare;
}
// If the column is not specified, the "minimum" key type appears the
// latest in the sorted order, regardless of the timestamp. This is used
// for specifying the last key/value in a given row, because there is no
// "lexicographically last column" (it would be infinitely long). The
// "maximum" key type does not need this behavior.
if (cell.getFamilyLength() + cell.getQualifierLength() == 0
&& cell.getTypeByte() == Type.Minimum.getCode()) {
// left is "bigger", i.e. it appears later in the sorted order
return 1;
}
if (flen+clen == 0 && type == Type.Minimum.getCode()) {
return -1;
}
compare = compareFamilies(
cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(),
fam, foff, flen);
if (compare != 0) {
return compare;
}
compare = compareColumns(
cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(),
col, coff, clen);
if (compare != 0) {
return compare;
}
// Next compare timestamps.
compare = compareTimestamps(cell.getTimestamp(), ts);
if (compare != 0) {
return compare;
}
// Compare types. Let the delete types sort ahead of puts; i.e. types
// of higher numbers sort before those of lesser numbers. Maximum (255)
// appears ahead of everything, and minimum (0) appears after
// everything.
return (0xff & type) - (0xff & cell.getTypeByte());
}
public int compareOnlyKeyPortion(Cell left, Cell right) { public int compareOnlyKeyPortion(Cell left, Cell right) {
return CellComparator.compare(left, right, true); return CellComparator.compare(left, right, true);
} }
@ -2595,16 +2647,15 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
* Hence create a Keyvalue(aka Cell) that would help in comparing as two cells * Hence create a Keyvalue(aka Cell) that would help in comparing as two cells
*/ */
public static class KeyOnlyKeyValue extends KeyValue { public static class KeyOnlyKeyValue extends KeyValue {
private int length = 0;
private int offset = 0;
private byte[] b;
public KeyOnlyKeyValue() { public KeyOnlyKeyValue() {
} }
public KeyOnlyKeyValue(byte[] b) {
this(b, 0, b.length);
}
public KeyOnlyKeyValue(byte[] b, int offset, int length) { public KeyOnlyKeyValue(byte[] b, int offset, int length) {
this.b = b; this.bytes = b;
this.length = length; this.length = length;
this.offset = offset; this.offset = offset;
} }
@ -2622,7 +2673,7 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
* @param length * @param length
*/ */
public void setKey(byte[] key, int offset, int length) { public void setKey(byte[] key, int offset, int length) {
this.b = key; this.bytes = key;
this.offset = offset; this.offset = offset;
this.length = length; this.length = length;
} }
@ -2631,13 +2682,13 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
public byte[] getKey() { public byte[] getKey() {
int keylength = getKeyLength(); int keylength = getKeyLength();
byte[] key = new byte[keylength]; byte[] key = new byte[keylength];
System.arraycopy(this.b, getKeyOffset(), key, 0, keylength); System.arraycopy(this.bytes, getKeyOffset(), key, 0, keylength);
return key; return key;
} }
@Override @Override
public byte[] getRowArray() { public byte[] getRowArray() {
return b; return bytes;
} }
@Override @Override
@ -2647,12 +2698,12 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
@Override @Override
public byte[] getFamilyArray() { public byte[] getFamilyArray() {
return b; return bytes;
} }
@Override @Override
public byte getFamilyLength() { public byte getFamilyLength() {
return this.b[getFamilyOffset() - 1]; return this.bytes[getFamilyOffset() - 1];
} }
@Override @Override
@ -2662,7 +2713,7 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
@Override @Override
public byte[] getQualifierArray() { public byte[] getQualifierArray() {
return b; return bytes;
} }
@Override @Override
@ -2682,12 +2733,12 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
@Override @Override
public short getRowLength() { public short getRowLength() {
return Bytes.toShort(this.b, getKeyOffset()); return Bytes.toShort(this.bytes, getKeyOffset());
} }
@Override @Override
public byte getTypeByte() { public byte getTypeByte() {
return this.b[this.offset + getKeyLength() - 1]; return this.bytes[this.offset + getKeyLength() - 1];
} }
private int getQualifierLength(int rlength, int flength) { private int getQualifierLength(int rlength, int flength) {
@ -2697,7 +2748,7 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
@Override @Override
public long getTimestamp() { public long getTimestamp() {
int tsOffset = getTimestampOffset(); int tsOffset = getTimestampOffset();
return Bytes.toLong(this.b, tsOffset); return Bytes.toLong(this.bytes, tsOffset);
} }
@Override @Override
@ -2737,10 +2788,10 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
@Override @Override
public String toString() { public String toString() {
if (this.b == null || this.b.length == 0) { if (this.bytes == null || this.bytes.length == 0) {
return "empty"; return "empty";
} }
return keyToString(this.b, this.offset, getKeyLength()) + "/vlen=0/mvcc=0"; return keyToString(this.bytes, this.offset, getKeyLength()) + "/vlen=0/mvcc=0";
} }
@Override @Override

View File

@ -317,6 +317,11 @@ public class HalfStoreFileReader extends StoreFile.Reader {
} }
return ret; return ret;
} }
@Override
public Cell getNextIndexedKey() {
return null;
}
}; };
} }

View File

@ -17,6 +17,8 @@
*/ */
package org.apache.hadoop.hbase.io.hfile; package org.apache.hadoop.hbase.io.hfile;
import org.apache.hadoop.hbase.Cell;
/** /**
* BlockWithScanInfo is wrapper class for HFileBlock with other attributes. These attributes are * BlockWithScanInfo is wrapper class for HFileBlock with other attributes. These attributes are
* supposed to be much cheaper to be maintained in each caller thread than in HFileBlock itself. * supposed to be much cheaper to be maintained in each caller thread than in HFileBlock itself.
@ -27,9 +29,9 @@ public class BlockWithScanInfo {
* The first key in the next block following this one in the HFile. * The first key in the next block following this one in the HFile.
* If this key is unknown, this is reference-equal with HConstants.NO_NEXT_INDEXED_KEY * If this key is unknown, this is reference-equal with HConstants.NO_NEXT_INDEXED_KEY
*/ */
private final byte[] nextIndexedKey; private final Cell nextIndexedKey;
public BlockWithScanInfo(HFileBlock hFileBlock, byte[] nextIndexedKey) { public BlockWithScanInfo(HFileBlock hFileBlock, Cell nextIndexedKey) {
this.hFileBlock = hFileBlock; this.hFileBlock = hFileBlock;
this.nextIndexedKey = nextIndexedKey; this.nextIndexedKey = nextIndexedKey;
} }
@ -38,7 +40,7 @@ public class BlockWithScanInfo {
return hFileBlock; return hFileBlock;
} }
public byte[] getNextIndexedKey() { public Cell getNextIndexedKey() {
return nextIndexedKey; return nextIndexedKey;
} }
} }

View File

@ -218,14 +218,14 @@ public class HFileBlockIndex {
} }
// the next indexed key // the next indexed key
byte[] nextIndexedKey = null; Cell nextIndexedKey = null;
// Read the next-level (intermediate or leaf) index block. // Read the next-level (intermediate or leaf) index block.
long currentOffset = blockOffsets[rootLevelIndex]; long currentOffset = blockOffsets[rootLevelIndex];
int currentOnDiskSize = blockDataSizes[rootLevelIndex]; int currentOnDiskSize = blockDataSizes[rootLevelIndex];
if (rootLevelIndex < blockKeys.length - 1) { if (rootLevelIndex < blockKeys.length - 1) {
nextIndexedKey = blockKeys[rootLevelIndex + 1]; nextIndexedKey = new KeyValue.KeyOnlyKeyValue(blockKeys[rootLevelIndex + 1]);
} else { } else {
nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY; nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY;
} }
@ -298,7 +298,7 @@ public class HFileBlockIndex {
// Only update next indexed key if there is a next indexed key in the current level // Only update next indexed key if there is a next indexed key in the current level
byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1); byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1);
if (tmpNextIndexedKey != null) { if (tmpNextIndexedKey != null) {
nextIndexedKey = tmpNextIndexedKey; nextIndexedKey = new KeyValue.KeyOnlyKeyValue(tmpNextIndexedKey);
} }
} }

View File

@ -542,6 +542,10 @@ public class HFileReaderV2 extends AbstractHFileReader {
extends AbstractHFileReader.Scanner { extends AbstractHFileReader.Scanner {
protected HFileBlock block; protected HFileBlock block;
@Override
public Cell getNextIndexedKey() {
return nextIndexedKey;
}
/** /**
* The next indexed key is to keep track of the indexed key of the next data block. * The next indexed key is to keep track of the indexed key of the next data block.
* If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the * If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the
@ -549,7 +553,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
* *
* If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet. * If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet.
*/ */
protected byte[] nextIndexedKey; protected Cell nextIndexedKey;
public AbstractScannerV2(HFileReaderV2 r, boolean cacheBlocks, public AbstractScannerV2(HFileReaderV2 r, boolean cacheBlocks,
final boolean pread, final boolean isCompaction) { final boolean pread, final boolean isCompaction) {
@ -558,7 +562,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
protected abstract ByteBuffer getFirstKeyInBlock(HFileBlock curBlock); protected abstract ByteBuffer getFirstKeyInBlock(HFileBlock curBlock);
protected abstract int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey, protected abstract int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
boolean rewind, Cell key, boolean seekBefore) throws IOException; boolean rewind, Cell key, boolean seekBefore) throws IOException;
@Override @Override
@ -592,9 +596,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
if (this.nextIndexedKey != null && if (this.nextIndexedKey != null &&
(this.nextIndexedKey == HConstants.NO_NEXT_INDEXED_KEY || reader (this.nextIndexedKey == HConstants.NO_NEXT_INDEXED_KEY || reader
.getComparator() .getComparator()
.compareOnlyKeyPortion(key, .compareOnlyKeyPortion(key, nextIndexedKey) < 0)) {
new KeyValue.KeyOnlyKeyValue(nextIndexedKey, 0,
nextIndexedKey.length)) < 0)) {
// The reader shall continue to scan the current data block instead // The reader shall continue to scan the current data block instead
// of querying the // of querying the
// block index as long as it knows the target key is strictly // block index as long as it knows the target key is strictly
@ -672,7 +674,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
// TODO shortcut: seek forward in this block to the last key of the // TODO shortcut: seek forward in this block to the last key of the
// block. // block.
} }
byte[] firstKeyInCurrentBlock = Bytes.getBytes(firstKey); Cell firstKeyInCurrentBlock = new KeyValue.KeyOnlyKeyValue(Bytes.getBytes(firstKey));
loadBlockAndSeekToKey(seekToBlock, firstKeyInCurrentBlock, true, key, true); loadBlockAndSeekToKey(seekToBlock, firstKeyInCurrentBlock, true, key, true);
return true; return true;
} }
@ -877,7 +879,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
} }
@Override @Override
protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey, protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
boolean rewind, Cell key, boolean seekBefore) throws IOException { boolean rewind, Cell key, boolean seekBefore) throws IOException {
if (block == null || block.getOffset() != seekToBlock.getOffset()) { if (block == null || block.getOffset() != seekToBlock.getOffset()) {
updateCurrBlock(seekToBlock); updateCurrBlock(seekToBlock);
@ -1234,7 +1236,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
} }
@Override @Override
protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey, protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
boolean rewind, Cell key, boolean seekBefore) throws IOException { boolean rewind, Cell key, boolean seekBefore) throws IOException {
if (block == null || block.getOffset() != seekToBlock.getOffset()) { if (block == null || block.getOffset() != seekToBlock.getOffset()) {
updateCurrentBlock(seekToBlock); updateCurrentBlock(seekToBlock);

View File

@ -156,4 +156,9 @@ public interface HFileScanner {
* Otherwise returns false. * Otherwise returns false.
*/ */
boolean isSeeked(); boolean isSeeked();
/**
* @return the next key in the index (the key to seek to the next block)
*/
Cell getNextIndexedKey();
} }

View File

@ -56,10 +56,6 @@ public class ExplicitColumnTracker implements ColumnTracker {
private final int maxVersions; private final int maxVersions;
private final int minVersions; private final int minVersions;
// hint for the tracker about how many KVs we will attempt to search via next()
// before we schedule a (re)seek operation
private final int lookAhead;
/** /**
* Contains the list of columns that the ExplicitColumnTracker is tracking. * Contains the list of columns that the ExplicitColumnTracker is tracking.
* Each ColumnCount instance also tracks how many versions of the requested * Each ColumnCount instance also tracks how many versions of the requested
@ -72,7 +68,6 @@ public class ExplicitColumnTracker implements ColumnTracker {
* Used to eliminate duplicates. */ * Used to eliminate duplicates. */
private long latestTSOfCurrentColumn; private long latestTSOfCurrentColumn;
private long oldestStamp; private long oldestStamp;
private int skipCount;
/** /**
* Default constructor. * Default constructor.
@ -85,10 +80,9 @@ public class ExplicitColumnTracker implements ColumnTracker {
* (re)seeking * (re)seeking
*/ */
public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions, public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions,
int maxVersions, long oldestUnexpiredTS, int lookAhead) { int maxVersions, long oldestUnexpiredTS) {
this.maxVersions = maxVersions; this.maxVersions = maxVersions;
this.minVersions = minVersions; this.minVersions = minVersions;
this.lookAhead = lookAhead;
this.oldestStamp = oldestUnexpiredTS; this.oldestStamp = oldestUnexpiredTS;
this.columns = new ColumnCount[columns.size()]; this.columns = new ColumnCount[columns.size()];
int i=0; int i=0;
@ -144,8 +138,7 @@ public class ExplicitColumnTracker implements ColumnTracker {
if (ret > 0) { if (ret > 0) {
// The current KV is smaller than the column the ExplicitColumnTracker // The current KV is smaller than the column the ExplicitColumnTracker
// is interested in, so seek to that column of interest. // is interested in, so seek to that column of interest.
return this.skipCount++ < this.lookAhead ? ScanQueryMatcher.MatchCode.SKIP return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
: ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
} }
// The current KV is bigger than the column the ExplicitColumnTracker // The current KV is bigger than the column the ExplicitColumnTracker
@ -154,7 +147,6 @@ public class ExplicitColumnTracker implements ColumnTracker {
// column of interest, and check again. // column of interest, and check again.
if (ret <= -1) { if (ret <= -1) {
++this.index; ++this.index;
this.skipCount = 0;
if (done()) { if (done()) {
// No more to match, do not include, done with this row. // No more to match, do not include, done with this row.
return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
@ -179,7 +171,6 @@ public class ExplicitColumnTracker implements ColumnTracker {
if (count >= maxVersions || (count >= minVersions && isExpired(timestamp))) { if (count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
// Done with versions for this column // Done with versions for this column
++this.index; ++this.index;
this.skipCount = 0;
resetTS(); resetTS();
if (done()) { if (done()) {
// We have served all the requested columns. // We have served all the requested columns.
@ -198,7 +189,6 @@ public class ExplicitColumnTracker implements ColumnTracker {
// Called between every row. // Called between every row.
public void reset() { public void reset() {
this.index = 0; this.index = 0;
this.skipCount = 0;
this.column = this.columns[this.index]; this.column = this.columns[this.index];
for(ColumnCount col : this.columns) { for(ColumnCount col : this.columns) {
col.setCount(0); col.setCount(0);
@ -238,7 +228,6 @@ public class ExplicitColumnTracker implements ColumnTracker {
resetTS(); resetTS();
if (compare <= 0) { if (compare <= 0) {
++this.index; ++this.index;
this.skipCount = 0;
if (done()) { if (done()) {
// Will not hit any more columns in this storefile // Will not hit any more columns in this storefile
this.column = null; this.column = null;

View File

@ -395,4 +395,10 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner
KeyValueScanner getCurrentForTesting() { KeyValueScanner getCurrentForTesting() {
return current; return current;
} }
@Override
public Cell getNextIndexedKey() {
// here we return the next index key from the top scanner
return current == null ? null : current.getNextIndexedKey();
}
} }

View File

@ -156,4 +156,10 @@ public interface KeyValueScanner {
* @throws IOException * @throws IOException
*/ */
public boolean seekToLastRow() throws IOException; public boolean seekToLastRow() throws IOException;
/**
* @return the next key in the index (the key to seek to the next block)
* if known, or null otherwise
*/
public Cell getNextIndexedKey();
} }

View File

@ -67,4 +67,8 @@ public abstract class NonLazyKeyValueScanner implements KeyValueScanner {
// Not a file by default. // Not a file by default.
return false; return false;
} }
@Override
public Cell getNextIndexedKey() {
return null;
}
} }

View File

@ -22,6 +22,7 @@ package org.apache.hadoop.hbase.regionserver;
import java.io.IOException; import java.io.IOException;
import java.util.NavigableSet; import java.util.NavigableSet;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.CellUtil;
@ -204,9 +205,8 @@ public class ScanQueryMatcher {
// We can share the ExplicitColumnTracker, diff is we reset // We can share the ExplicitColumnTracker, diff is we reset
// between rows, not between storefiles. // between rows, not between storefiles.
byte[] attr = scan.getAttribute(Scan.HINT_LOOKAHEAD);
this.columns = new ExplicitColumnTracker(columns, scanInfo.getMinVersions(), maxVersions, this.columns = new ExplicitColumnTracker(columns, scanInfo.getMinVersions(), maxVersions,
oldestUnexpiredTS, attr == null ? 0 : Bytes.toInt(attr)); oldestUnexpiredTS);
} }
this.isReversed = scan.isReversed(); this.isReversed = scan.isReversed();
} }
@ -577,6 +577,45 @@ public class ScanQueryMatcher {
null, 0, 0); null, 0, 0);
} }
/**
* @param nextIndexed the key of the next entry in the block index (if any)
* @param off
* @param len
* @param kv The Cell we're using to calculate the seek key
* @return result of the compare between the indexed key and the key portion of the passed cell
*/
public int compareKeyForNextRow(Cell nextIndexed, Cell kv) {
return rowComparator.compareKey(nextIndexed,
kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
null, 0, 0,
null, 0, 0,
HConstants.OLDEST_TIMESTAMP, Type.Minimum.getCode());
}
/**
* @param nextIndexed the key of the next entry in the block index (if any)
* @param off
* @param len
* @param kv The Cell we're using to calculate the seek key
* @return result of the compare between the indexed key and the key portion of the passed cell
*/
public int compareKeyForNextColumn(Cell nextIndexed, Cell kv) {
ColumnCount nextColumn = columns.getColumnHint();
if (nextColumn == null) {
return rowComparator.compareKey(nextIndexed,
kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength(),
HConstants.OLDEST_TIMESTAMP, Type.Minimum.getCode());
} else {
return rowComparator.compareKey(nextIndexed,
kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength(),
HConstants.LATEST_TIMESTAMP, Type.Maximum.getCode());
}
}
//Used only for testing purposes //Used only for testing purposes
static MatchCode checkColumn(ColumnTracker columnTracker, byte[] bytes, int offset, static MatchCode checkColumn(ColumnTracker columnTracker, byte[] bytes, int offset,
int length, long ttl, byte type, boolean ignoreCount) throws IOException { int length, long ttl, byte type, boolean ignoreCount) throws IOException {

View File

@ -484,4 +484,9 @@ public class StoreFileScanner implements KeyValueScanner {
} }
return true; return true;
} }
@Override
public Cell getNextIndexedKey() {
return hfs.getNextIndexedKey();
}
} }

View File

@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.client.IsolationLevel;
import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
import org.apache.hadoop.hbase.regionserver.handler.ParallelSeekHandler; import org.apache.hadoop.hbase.regionserver.handler.ParallelSeekHandler;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@ -494,6 +495,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
prevCell = cell; prevCell = cell;
ScanQueryMatcher.MatchCode qcode = matcher.match(cell); ScanQueryMatcher.MatchCode qcode = matcher.match(cell);
qcode = optimize(qcode, cell);
switch(qcode) { switch(qcode) {
case INCLUDE: case INCLUDE:
case INCLUDE_AND_SEEK_NEXT_ROW: case INCLUDE_AND_SEEK_NEXT_ROW:
@ -596,6 +598,38 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
} }
} }
/*
* See if we should actually SEEK or rather just SKIP to the next Cell.
* (see HBASE-13109)
*/
private ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) {
Cell nextIndexedKey = getNextIndexedKey();
if (nextIndexedKey == null || nextIndexedKey == HConstants.NO_NEXT_INDEXED_KEY || store == null) {
return qcode;
}
switch(qcode) {
case INCLUDE_AND_SEEK_NEXT_COL:
case SEEK_NEXT_COL:
{
if (matcher.compareKeyForNextColumn(nextIndexedKey, cell) >= 0) {
return qcode == MatchCode.SEEK_NEXT_COL ? MatchCode.SKIP : MatchCode.INCLUDE;
}
break;
}
case INCLUDE_AND_SEEK_NEXT_ROW:
case SEEK_NEXT_ROW:
{
if (matcher.compareKeyForNextRow(nextIndexedKey, cell) >= 0) {
return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : MatchCode.INCLUDE;
}
break;
}
default:
break;
}
return qcode;
}
@Override @Override
public boolean next(List<Cell> outResult) throws IOException { public boolean next(List<Cell> outResult) throws IOException {
return next(outResult, -1); return next(outResult, -1);
@ -799,5 +833,10 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
public long getEstimatedNumberOfKvsScanned() { public long getEstimatedNumberOfKvsScanned() {
return this.kvsScanned; return this.kvsScanned;
} }
@Override
public Cell getNextIndexedKey() {
return this.heap.getNextIndexedKey();
}
} }

View File

@ -271,7 +271,7 @@ public class TestBlocksRead extends HBaseTestCase {
} }
/** /**
* Test # of blocks read (targetted at some of the cases Lazy Seek optimizes). * Test # of blocks read (targeted at some of the cases Lazy Seek optimizes).
* *
* @throws Exception * @throws Exception
*/ */
@ -356,8 +356,8 @@ public class TestBlocksRead extends HBaseTestCase {
putData(FAMILY, "row", "col3", 9); putData(FAMILY, "row", "col3", 9);
region.flushcache(); region.flushcache();
// Baseline expected blocks read: 8. [HBASE-4532] // Baseline expected blocks read: 6. [HBASE-4532]
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 5); kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 6, 7, 7);
assertEquals(0, kvs.length); assertEquals(0, kvs.length);
// File 7: Put back new data // File 7: Put back new data
@ -367,8 +367,8 @@ public class TestBlocksRead extends HBaseTestCase {
region.flushcache(); region.flushcache();
// Expected blocks read: 5. [HBASE-4585] // Expected blocks read: 8. [HBASE-4585, HBASE-13109]
kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 5); kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 8, 9, 9);
assertEquals(3, kvs.length); assertEquals(3, kvs.length);
verifyData(kvs[0], "row", "col1", 11); verifyData(kvs[0], "row", "col1", 11);
verifyData(kvs[1], "row", "col2", 12); verifyData(kvs[1], "row", "col2", 12);

View File

@ -48,9 +48,9 @@ public class TestExplicitColumnTracker {
private void runTest(int maxVersions, private void runTest(int maxVersions,
TreeSet<byte[]> trackColumns, TreeSet<byte[]> trackColumns,
List<byte[]> scannerColumns, List<byte[]> scannerColumns,
List<MatchCode> expected, int lookAhead) throws IOException { List<MatchCode> expected) throws IOException {
ColumnTracker exp = new ExplicitColumnTracker( ColumnTracker exp = new ExplicitColumnTracker(
trackColumns, 0, maxVersions, Long.MIN_VALUE, lookAhead); trackColumns, 0, maxVersions, Long.MIN_VALUE);
//Initialize result //Initialize result
@ -92,7 +92,7 @@ public class TestExplicitColumnTracker {
scanner.add(col4); scanner.add(col4);
scanner.add(col5); scanner.add(col5);
runTest(maxVersions, columns, scanner, expected, 0); runTest(maxVersions, columns, scanner, expected);
} }
@Test @Test
@ -144,59 +144,7 @@ public class TestExplicitColumnTracker {
scanner.add(col5); scanner.add(col5);
//Initialize result //Initialize result
runTest(maxVersions, columns, scanner, expected, 0); runTest(maxVersions, columns, scanner, expected);
}
@Test
public void testGet_MultiVersionWithLookAhead() throws IOException{
//Create tracker
TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
//Looking for every other
columns.add(col2);
columns.add(col4);
List<ScanQueryMatcher.MatchCode> expected = new ArrayList<ScanQueryMatcher.MatchCode>();
expected.add(ScanQueryMatcher.MatchCode.SKIP);
expected.add(ScanQueryMatcher.MatchCode.SKIP);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.INCLUDE); // col2; 1st version
expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL); // col2; 2nd version
expected.add(ScanQueryMatcher.MatchCode.SKIP);
expected.add(ScanQueryMatcher.MatchCode.SKIP);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.INCLUDE); // col4; 1st version
expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW); // col4; 2nd version
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW);
expected.add(ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW);
int maxVersions = 2;
//Create "Scanner"
List<byte[]> scanner = new ArrayList<byte[]>();
scanner.add(col1);
scanner.add(col1);
scanner.add(col1);
scanner.add(col2);
scanner.add(col2);
scanner.add(col2);
scanner.add(col3);
scanner.add(col3);
scanner.add(col3);
scanner.add(col4);
scanner.add(col4);
scanner.add(col4);
scanner.add(col5);
scanner.add(col5);
scanner.add(col5);
//Initialize result
runTest(maxVersions, columns, scanner, expected, 2);
} }
/** /**
@ -211,7 +159,7 @@ public class TestExplicitColumnTracker {
} }
ColumnTracker explicit = new ExplicitColumnTracker(columns, 0, maxVersions, ColumnTracker explicit = new ExplicitColumnTracker(columns, 0, maxVersions,
Long.MIN_VALUE, 0); Long.MIN_VALUE);
for (int i = 0; i < 100000; i+=2) { for (int i = 0; i < 100000; i+=2) {
byte [] col = Bytes.toBytes("col"+i); byte [] col = Bytes.toBytes("col"+i);
ScanQueryMatcher.checkColumn(explicit, col, 0, col.length, 1, KeyValue.Type.Put.getCode(), ScanQueryMatcher.checkColumn(explicit, col, 0, col.length, 1, KeyValue.Type.Put.getCode(),
@ -240,7 +188,7 @@ public class TestExplicitColumnTracker {
new ScanQueryMatcher.MatchCode[] { new ScanQueryMatcher.MatchCode[] {
ScanQueryMatcher.MatchCode.SEEK_NEXT_COL, ScanQueryMatcher.MatchCode.SEEK_NEXT_COL,
ScanQueryMatcher.MatchCode.SEEK_NEXT_COL }); ScanQueryMatcher.MatchCode.SEEK_NEXT_COL });
runTest(1, columns, scanner, expected, 0); runTest(1, columns, scanner, expected);
} }
} }

View File

@ -147,27 +147,6 @@ public class TestQueryMatcher extends HBaseTestCase {
_testMatch_ExplicitColumns(scan, expected); _testMatch_ExplicitColumns(scan, expected);
} }
@Test
public void testMatch_ExplicitColumnsWithLookAhead()
throws IOException {
//Moving up from the Tracker by using Gets and List<KeyValue> instead
//of just byte []
//Expected result
List<MatchCode> expected = new ArrayList<ScanQueryMatcher.MatchCode>();
expected.add(ScanQueryMatcher.MatchCode.SKIP);
expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.SKIP);
expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL);
expected.add(ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW);
expected.add(ScanQueryMatcher.MatchCode.DONE);
Scan s = new Scan(scan);
s.setAttribute(Scan.HINT_LOOKAHEAD, Bytes.toBytes(2));
_testMatch_ExplicitColumns(s, expected);
}
@Test @Test
public void testMatch_Wildcard() public void testMatch_Wildcard()
throws IOException { throws IOException {