HBASE-15392 Single Cell Get reads two HFileBlocks
M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java moreRowsMayExistAfterCell Exploit the fact a Scan is a Get Scan. Also save compares if no non-default stopRow. M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java optimize Add doc on what is being optimized. Also, if a Get Scan, do not optimize else we'll keep going after our row is DONE. Another place to make use of the Get Scan fact is when we are DONE.. if Get Scan, we can close out the scan. Signed-off-by: stack <stack@apache.org>
This commit is contained in:
parent
5e552e57a5
commit
27446a5c4a
|
@ -2035,6 +2035,11 @@ public class KeyValue implements Cell, HeapSize, Cloneable, SettableSequenceId,
|
|||
right.getRowArray(), right.getRowOffset(), right.getRowLength());
|
||||
}
|
||||
|
||||
public int compareRows(Cell left, byte[] right, int roffset, int rlength) {
|
||||
return compareRows(left.getRowArray(), left.getRowOffset(), left.getRowLength(), right,
|
||||
roffset, rlength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the b[],o,l for left and right rowkey portions and compare.
|
||||
* @param left
|
||||
|
|
|
@ -61,8 +61,8 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
|
|||
@Override
|
||||
public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory,
|
||||
final boolean cacheDataInL1) {
|
||||
boolean isMetaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA;
|
||||
if (isMetaBlock || cacheDataInL1) {
|
||||
boolean metaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA;
|
||||
if (metaBlock || cacheDataInL1) {
|
||||
lruCache.cacheBlock(cacheKey, buf, inMemory, cacheDataInL1);
|
||||
} else {
|
||||
l2Cache.cacheBlock(cacheKey, buf, inMemory, false);
|
||||
|
@ -79,12 +79,9 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
|
|||
boolean repeat, boolean updateCacheMetrics) {
|
||||
// TODO: is there a hole here, or just awkwardness since in the lruCache getBlock
|
||||
// we end up calling l2Cache.getBlock.
|
||||
if (lruCache.containsBlock(cacheKey)) {
|
||||
return lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
|
||||
}
|
||||
Cacheable result = l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
|
||||
|
||||
return result;
|
||||
return lruCache.containsBlock(cacheKey)?
|
||||
lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics):
|
||||
l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -29,6 +29,9 @@ import org.apache.hadoop.hbase.client.Scan;
|
|||
* Scanner that returns the next KeyValue.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner
|
||||
// so this should be something else altogether, a decoration on our base CellScanner. TODO.
|
||||
// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0.
|
||||
public interface KeyValueScanner {
|
||||
/**
|
||||
* The byte array represents for NO_NEXT_INDEXED_KEY;
|
||||
|
@ -134,11 +137,11 @@ public interface KeyValueScanner {
|
|||
* peek KeyValue of scanner has the same row with specified Cell,
|
||||
* otherwise seek the scanner at the first Cell of the row which is the
|
||||
* previous row of specified KeyValue
|
||||
*
|
||||
*
|
||||
* @param key seek KeyValue
|
||||
* @return true if the scanner is at the valid KeyValue, false if such
|
||||
* KeyValue does not exist
|
||||
*
|
||||
*
|
||||
*/
|
||||
public boolean backwardSeek(Cell key) throws IOException;
|
||||
|
||||
|
@ -153,7 +156,7 @@ public interface KeyValueScanner {
|
|||
|
||||
/**
|
||||
* Seek the scanner at the first KeyValue of last row
|
||||
*
|
||||
*
|
||||
* @return true if scanner has values left, false if the underlying data is
|
||||
* empty
|
||||
* @throws IOException
|
||||
|
@ -161,8 +164,9 @@ public interface KeyValueScanner {
|
|||
public boolean seekToLastRow() throws IOException;
|
||||
|
||||
/**
|
||||
* @return the next key in the index (the key to seek to the next block)
|
||||
* if known, or null otherwise
|
||||
* @return the next key in the index, usually the first key of next block OR a key that falls
|
||||
* between last key of current block and first key of next block..
|
||||
* see HFileWriterImpl#getMidpoint, or null if not known.
|
||||
*/
|
||||
public Cell getNextIndexedKey();
|
||||
}
|
||||
|
|
|
@ -93,7 +93,7 @@ public class ScanQueryMatcher {
|
|||
byte [] row;
|
||||
int rowOffset;
|
||||
short rowLength;
|
||||
|
||||
|
||||
/**
|
||||
* Oldest put in any of the involved store files
|
||||
* Used to decide whether it is ok to delete
|
||||
|
@ -119,7 +119,7 @@ public class ScanQueryMatcher {
|
|||
* first column.
|
||||
* */
|
||||
private boolean hasNullColumn = true;
|
||||
|
||||
|
||||
private RegionCoprocessorHost regionCoprocessorHost= null;
|
||||
|
||||
// By default, when hbase.hstore.time.to.purge.deletes is 0ms, a delete
|
||||
|
@ -140,22 +140,22 @@ public class ScanQueryMatcher {
|
|||
// currently influencing. This is because Puts, that this delete can
|
||||
// influence. may appear out of order.
|
||||
private final long timeToPurgeDeletes;
|
||||
|
||||
|
||||
private final boolean isUserScan;
|
||||
|
||||
private final boolean isReversed;
|
||||
|
||||
/**
|
||||
* True if we are doing a 'Get' Scan. Every Get is actually a one-row Scan.
|
||||
*/
|
||||
private final boolean get;
|
||||
|
||||
/**
|
||||
* Construct a QueryMatcher for a scan
|
||||
* @param scan
|
||||
* @param scanInfo The store's immutable scan info
|
||||
* @param columns
|
||||
* @param scanType Type of the scan
|
||||
* @param earliestPutTs Earliest put seen in any of the store files.
|
||||
* @param oldestUnexpiredTS the oldest timestamp we are interested in,
|
||||
* based on TTL
|
||||
* @param regionCoprocessorHost
|
||||
* @throws IOException
|
||||
* @param oldestUnexpiredTS the oldest timestamp we are interested in, based on TTL
|
||||
*/
|
||||
public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> columns,
|
||||
ScanType scanType, long readPointToUse, long earliestPutTs, long oldestUnexpiredTS,
|
||||
|
@ -166,6 +166,7 @@ public class ScanQueryMatcher {
|
|||
} else {
|
||||
this.tr = timeRange;
|
||||
}
|
||||
this.get = scan.isGetScan();
|
||||
this.rowComparator = scanInfo.getComparator();
|
||||
this.regionCoprocessorHost = regionCoprocessorHost;
|
||||
this.deletes = instantiateDeleteTracker();
|
||||
|
@ -234,8 +235,8 @@ public class ScanQueryMatcher {
|
|||
* @param now the current server time
|
||||
* @param dropDeletesFromRow The inclusive left bound of the range; can be EMPTY_START_ROW.
|
||||
* @param dropDeletesToRow The exclusive right bound of the range; can be EMPTY_END_ROW.
|
||||
* @param regionCoprocessorHost
|
||||
* @throws IOException
|
||||
* @param regionCoprocessorHost
|
||||
* @throws IOException
|
||||
*/
|
||||
public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> columns,
|
||||
long readPointToUse, long earliestPutTs, long oldestUnexpiredTS, long now,
|
||||
|
@ -280,7 +281,7 @@ public class ScanQueryMatcher {
|
|||
* caused by a data corruption.
|
||||
*/
|
||||
public MatchCode match(Cell cell) throws IOException {
|
||||
if (filter != null && filter.filterAllRemaining()) {
|
||||
if (filter != null && filter.filterAllRemaining()) {
|
||||
return MatchCode.DONE_SCAN;
|
||||
}
|
||||
if (row != null) {
|
||||
|
@ -327,7 +328,7 @@ public class ScanQueryMatcher {
|
|||
// check if the cell is expired by cell TTL
|
||||
if (HStore.isCellTTLExpired(cell, this.oldestUnexpiredTS, this.now)) {
|
||||
return MatchCode.SKIP;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The delete logic is pretty complicated now.
|
||||
|
@ -362,10 +363,10 @@ public class ScanQueryMatcher {
|
|||
}
|
||||
// Can't early out now, because DelFam come before any other keys
|
||||
}
|
||||
|
||||
|
||||
if ((!isUserScan)
|
||||
&& timeToPurgeDeletes > 0
|
||||
&& (EnvironmentEdgeManager.currentTime() - timestamp)
|
||||
&& (EnvironmentEdgeManager.currentTime() - timestamp)
|
||||
<= timeToPurgeDeletes) {
|
||||
return MatchCode.INCLUDE;
|
||||
} else if (retainDeletesInOutput || mvccVersion > maxReadPointToTrackVersions) {
|
||||
|
@ -417,7 +418,7 @@ public class ScanQueryMatcher {
|
|||
}
|
||||
|
||||
// STEP 1: Check if the column is part of the requested columns
|
||||
MatchCode colChecker = columns.checkColumn(cell.getQualifierArray(),
|
||||
MatchCode colChecker = columns.checkColumn(cell.getQualifierArray(),
|
||||
qualifierOffset, qualifierLength, typeByte);
|
||||
if (colChecker == MatchCode.INCLUDE) {
|
||||
ReturnCode filterResponse = ReturnCode.SKIP;
|
||||
|
@ -429,7 +430,7 @@ public class ScanQueryMatcher {
|
|||
case SKIP:
|
||||
return MatchCode.SKIP;
|
||||
case NEXT_COL:
|
||||
return columns.getNextRowOrNextColumn(cell.getQualifierArray(),
|
||||
return columns.getNextRowOrNextColumn(cell.getQualifierArray(),
|
||||
qualifierOffset, qualifierLength);
|
||||
case NEXT_ROW:
|
||||
stickyNextRow = true;
|
||||
|
@ -502,24 +503,27 @@ public class ScanQueryMatcher {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns false if we know there are no more rows to be scanned (We've reached the
|
||||
* <code>stopRow</code> or we are scanning on row only because this Scan is for a Get, etc.
|
||||
*/
|
||||
public boolean moreRowsMayExistAfter(Cell kv) {
|
||||
if (this.isReversed) {
|
||||
if (rowComparator.compareRows(kv.getRowArray(), kv.getRowOffset(),
|
||||
kv.getRowLength(), stopRow, 0, stopRow.length) <= 0) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
|
||||
rowComparator.compareRows(kv.getRowArray(),kv.getRowOffset(),
|
||||
kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) {
|
||||
// KV >= STOPROW
|
||||
// then NO there is nothing left.
|
||||
// If a 'get' Scan -- we are doing a Get (every Get is a single-row Scan in implementation) --
|
||||
// then we are looking at one row only, the one specified in the Get coordinate..so we know
|
||||
// for sure that there are no more rows on this Scan
|
||||
if (this.get) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
// If no stopRow, return that there may be more rows. The tests that follow depend on a
|
||||
// non-empty, non-default stopRow so this little test below short-circuits out doing the
|
||||
// following compares.
|
||||
if (this.stopRow == null || this.stopRow == HConstants.EMPTY_BYTE_ARRAY) {
|
||||
return true;
|
||||
}
|
||||
return this.isReversed?
|
||||
rowComparator.compareRows(kv, stopRow, 0, stopRow.length) > 0:
|
||||
Bytes.equals(stopRow, HConstants.EMPTY_END_ROW) ||
|
||||
rowComparator.compareRows(kv, stopRow, 0, stopRow.length) < 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -129,7 +129,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
|||
private List<KeyValueScanner> currentScanners = new ArrayList<KeyValueScanner>();
|
||||
// flush update lock
|
||||
private ReentrantLock flushLock = new ReentrantLock();
|
||||
|
||||
|
||||
private final long readPt;
|
||||
|
||||
// used by the injection framework to test race between StoreScanner construction and compaction
|
||||
|
@ -294,7 +294,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
|||
// 0 is passed as readpoint because the test bypasses Store
|
||||
0);
|
||||
}
|
||||
|
||||
|
||||
private StoreScanner(final Scan scan, ScanInfo scanInfo,
|
||||
ScanType scanType, final NavigableSet<byte[]> columns,
|
||||
final List<KeyValueScanner> scanners, long earliestPutTs, long readPt)
|
||||
|
@ -495,7 +495,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
|||
|
||||
// Clear progress away unless invoker has indicated it should be kept.
|
||||
if (!scannerContext.getKeepProgress()) scannerContext.clearProgress();
|
||||
|
||||
|
||||
// Only do a sanity-check if store and comparator are available.
|
||||
KeyValue.KVComparator comparator =
|
||||
store != null ? store.getComparator() : null;
|
||||
|
@ -587,6 +587,12 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
|||
continue;
|
||||
|
||||
case DONE:
|
||||
// Optimization for Gets! If DONE, no more to get on this row, early exit!
|
||||
if (this.scan.isGetScan()) {
|
||||
// Then no more to this row... exit.
|
||||
close();// Do all cleanup except heap.close()
|
||||
return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
|
||||
}
|
||||
// We are sure that this row is done and we are in the next row.
|
||||
// So subsequent StoresScanner.next() call need not do another compare
|
||||
// and set the matcher.row
|
||||
|
@ -642,11 +648,60 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
|||
return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
|
||||
}
|
||||
|
||||
/*
|
||||
* See if we should actually SEEK or rather just SKIP to the next Cell.
|
||||
* (see HBASE-13109)
|
||||
/**
|
||||
* See if we should actually SEEK or rather just SKIP to the next Cell (see HBASE-13109).
|
||||
* This method works together with ColumnTrackers and Filters. ColumnTrackers may issue SEEK
|
||||
* hints, such as seek to next column, next row, or seek to an arbitrary seek key.
|
||||
* This method intercepts these qcodes and decides whether a seek is the most efficient _actual_
|
||||
* way to get us to the requested cell (SEEKs are more expensive than SKIP, SKIP, SKIP inside the
|
||||
* current, loaded block).
|
||||
* It does this by looking at the next indexed key of the current HFile. This key
|
||||
* is then compared with the _SEEK_ key, where a SEEK key is an artificial 'last possible key
|
||||
* on the row' (only in here, we avoid actually creating a SEEK key; in the compare we work with
|
||||
* the current Cell but compare as though it were a seek key; see down in
|
||||
* matcher.compareKeyForNextRow, etc). If the compare gets us onto the
|
||||
* next block we *_SEEK, otherwise we just INCLUDE or SKIP, and let the ColumnTrackers or Filters
|
||||
* go through the next Cell, and so on)
|
||||
*
|
||||
* <p>The ColumnTrackers and Filters must behave correctly in all cases, i.e. if they are past the
|
||||
* Cells they care about they must issues a SKIP or SEEK.
|
||||
*
|
||||
* <p>Other notes:
|
||||
* <ul>
|
||||
* <li>Rows can straddle block boundaries</li>
|
||||
* <li>Versions of columns can straddle block boundaries (i.e. column C1 at T1 might be in a
|
||||
* different block than column C1 at T2)</li>
|
||||
* <li>We want to SKIP and INCLUDE if the chance is high that we'll find the desired Cell after a
|
||||
* few SKIPs...</li>
|
||||
* <li>We want to INCLUDE_AND_SEEK and SEEK when the chance is high that we'll be able to seek
|
||||
* past many Cells, especially if we know we need to go to the next block.</li>
|
||||
* </ul>
|
||||
* <p>A good proxy (best effort) to determine whether INCLUDE/SKIP is better than SEEK is whether
|
||||
* we'll likely end up seeking to the next block (or past the next block) to get our next column.
|
||||
* Example:
|
||||
* <pre>
|
||||
* | BLOCK 1 | BLOCK 2 |
|
||||
* | r1/c1, r1/c2, r1/c3 | r1/c4, r1/c5, r2/c1 |
|
||||
* ^ ^
|
||||
* | |
|
||||
* Next Index Key SEEK_NEXT_ROW (before r2/c1)
|
||||
*
|
||||
*
|
||||
* | BLOCK 1 | BLOCK 2 |
|
||||
* | r1/c1/t5, r1/c1/t4, r1/c1/t3 | r1/c1/t2, r1/c1/T1, r1/c2/T3 |
|
||||
* ^ ^
|
||||
* | |
|
||||
* Next Index Key SEEK_NEXT_COL
|
||||
* </pre>
|
||||
* Now imagine we want columns c1 and c3 (see first diagram above), the 'Next Index Key' of r1/c4
|
||||
* is > r1/c3 so we should seek to get to the c1 on the next row, r2. In second case, say we only
|
||||
* want one version of c1, after we have it, a SEEK_COL will be issued to get to c2. Looking at
|
||||
* the 'Next Index Key', it would land us in the next block, so we should SEEK. In other scenarios
|
||||
* where the SEEK will not land us in the next block, it is very likely better to issues a series
|
||||
* of SKIPs.
|
||||
*/
|
||||
private ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) {
|
||||
@VisibleForTesting
|
||||
protected ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) {
|
||||
switch(qcode) {
|
||||
case INCLUDE_AND_SEEK_NEXT_COL:
|
||||
case SEEK_NEXT_COL:
|
||||
|
@ -661,10 +716,16 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
|||
case INCLUDE_AND_SEEK_NEXT_ROW:
|
||||
case SEEK_NEXT_ROW:
|
||||
{
|
||||
Cell nextIndexedKey = getNextIndexedKey();
|
||||
if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY
|
||||
&& matcher.compareKeyForNextRow(nextIndexedKey, cell) >= 0) {
|
||||
return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : MatchCode.INCLUDE;
|
||||
// If it is a Get Scan, then we know that we are done with this row; there are no more
|
||||
// rows beyond the current one: don't try to optimize. We are DONE. Return the *_NEXT_ROW
|
||||
// qcode as is. When the caller gets these flags on a Get Scan, it knows it can shut down the
|
||||
// Scan.
|
||||
if (!this.scan.isGetScan()) {
|
||||
Cell nextIndexedKey = getNextIndexedKey();
|
||||
if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY
|
||||
&& matcher.compareKeyForNextRow(nextIndexedKey, cell) > 0) {
|
||||
return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : MatchCode.INCLUDE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -828,10 +889,10 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
|||
// check the var without any lock. Suppose even if we see the old
|
||||
// value here still it is ok to continue because we will not be resetting
|
||||
// the heap but will continue with the referenced memstore's snapshot. For compactions
|
||||
// any way we don't need the updateReaders at all to happen as we still continue with
|
||||
// any way we don't need the updateReaders at all to happen as we still continue with
|
||||
// the older files
|
||||
if (flushed) {
|
||||
// If there is a flush and the current scan is notified on the flush ensure that the
|
||||
// If there is a flush and the current scan is notified on the flush ensure that the
|
||||
// scan's heap gets reset and we do a seek on the newly flushed file.
|
||||
if(!this.closing) {
|
||||
this.lastTop = this.peek();
|
||||
|
@ -861,7 +922,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
|||
if (scanners.isEmpty()) return;
|
||||
int storeFileScannerCount = scanners.size();
|
||||
CountDownLatch latch = new CountDownLatch(storeFileScannerCount);
|
||||
List<ParallelSeekHandler> handlers =
|
||||
List<ParallelSeekHandler> handlers =
|
||||
new ArrayList<ParallelSeekHandler>(storeFileScannerCount);
|
||||
for (KeyValueScanner scanner : scanners) {
|
||||
if (scanner instanceof StoreFileScanner) {
|
||||
|
|
|
@ -30,8 +30,7 @@ import org.apache.hadoop.hbase.KeyValue;
|
|||
import org.apache.hadoop.hbase.regionserver.NonReversedNonLazyKeyValueScanner;
|
||||
|
||||
/**
|
||||
* Utility scanner that wraps a sortable collection and serves
|
||||
* as a KeyValueScanner.
|
||||
* Utility scanner that wraps a sortable collection and serves as a KeyValueScanner.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class CollectionBackedScanner extends NonReversedNonLazyKeyValueScanner {
|
||||
|
|
|
@ -33,8 +33,7 @@ import java.util.List;
|
|||
* to be a store file scanner.
|
||||
*/
|
||||
public class KeyValueScanFixture extends CollectionBackedScanner {
|
||||
public KeyValueScanFixture(KeyValue.KVComparator comparator,
|
||||
KeyValue... incData) {
|
||||
public KeyValueScanFixture(KeyValue.KVComparator comparator, KeyValue... incData) {
|
||||
super(comparator, incData);
|
||||
}
|
||||
|
||||
|
|
|
@ -44,8 +44,7 @@ public class TestKeyValueScanFixture extends TestCase {
|
|||
KeyValueTestUtil.create("RowB", "family", "qf1",
|
||||
10, KeyValue.Type.Put, "value-10")
|
||||
};
|
||||
KeyValueScanner scan = new KeyValueScanFixture(
|
||||
KeyValue.COMPARATOR, kvs);
|
||||
KeyValueScanner scan = new KeyValueScanFixture(KeyValue.COMPARATOR, kvs);
|
||||
|
||||
KeyValue kv = KeyValueUtil.createFirstOnRow(Bytes.toBytes("RowA"));
|
||||
// should seek to this:
|
||||
|
|
Loading…
Reference in New Issue