Revert "HBASE-15392 Single Cell Get reads two HFileBlocks M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java moreRowsMayExistAfterCell Exploit the fact a Scan is a Get Scan. Also save compares if no non-default stopRow."
Revert mistaken commit
This reverts commit 7073f69993
.
This commit is contained in:
parent
c49d0caf53
commit
a13d6e000d
|
@ -433,7 +433,7 @@ public class CellComparator implements Comparator<Cell>, Serializable {
|
||||||
/**
|
/**
|
||||||
* Used to compare two cells based on the column hint provided. This is specifically
|
* Used to compare two cells based on the column hint provided. This is specifically
|
||||||
* used when we need to optimize the seeks based on the next indexed key. This is an
|
* used when we need to optimize the seeks based on the next indexed key. This is an
|
||||||
* advanced usage API specifically needed for some optimizations.
|
* advance usage API specifically needed for some optimizations.
|
||||||
* @param nextIndexedCell the next indexed cell
|
* @param nextIndexedCell the next indexed cell
|
||||||
* @param currentCell the cell to be compared
|
* @param currentCell the cell to be compared
|
||||||
* @param foff the family offset of the currentCell
|
* @param foff the family offset of the currentCell
|
||||||
|
|
|
@ -63,8 +63,8 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
|
||||||
@Override
|
@Override
|
||||||
public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory,
|
public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory,
|
||||||
final boolean cacheDataInL1) {
|
final boolean cacheDataInL1) {
|
||||||
boolean metaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA;
|
boolean isMetaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA;
|
||||||
if (metaBlock || cacheDataInL1) {
|
if (isMetaBlock || cacheDataInL1) {
|
||||||
lruCache.cacheBlock(cacheKey, buf, inMemory, cacheDataInL1);
|
lruCache.cacheBlock(cacheKey, buf, inMemory, cacheDataInL1);
|
||||||
} else {
|
} else {
|
||||||
l2Cache.cacheBlock(cacheKey, buf, inMemory, false);
|
l2Cache.cacheBlock(cacheKey, buf, inMemory, false);
|
||||||
|
@ -81,9 +81,12 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
|
||||||
boolean repeat, boolean updateCacheMetrics) {
|
boolean repeat, boolean updateCacheMetrics) {
|
||||||
// TODO: is there a hole here, or just awkwardness since in the lruCache getBlock
|
// TODO: is there a hole here, or just awkwardness since in the lruCache getBlock
|
||||||
// we end up calling l2Cache.getBlock.
|
// we end up calling l2Cache.getBlock.
|
||||||
return lruCache.containsBlock(cacheKey)?
|
if (lruCache.containsBlock(cacheKey)) {
|
||||||
lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics):
|
return lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
|
||||||
l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
|
}
|
||||||
|
Cacheable result = l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -29,9 +29,6 @@ import org.apache.hadoop.hbase.client.Scan;
|
||||||
* Scanner that returns the next KeyValue.
|
* Scanner that returns the next KeyValue.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner
|
|
||||||
// so this should be something else altogether, a decoration on our base CellScanner. TODO.
|
|
||||||
// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0.
|
|
||||||
public interface KeyValueScanner extends Shipper {
|
public interface KeyValueScanner extends Shipper {
|
||||||
/**
|
/**
|
||||||
* The byte array represents for NO_NEXT_INDEXED_KEY;
|
* The byte array represents for NO_NEXT_INDEXED_KEY;
|
||||||
|
@ -164,9 +161,8 @@ public interface KeyValueScanner extends Shipper {
|
||||||
public boolean seekToLastRow() throws IOException;
|
public boolean seekToLastRow() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the next key in the index, usually the first key of next block OR a key that falls
|
* @return the next key in the index (the key to seek to the next block)
|
||||||
* between last key of current block and first key of next block..
|
* if known, or null otherwise
|
||||||
* see HFileWriterImpl#getMidpoint, or null if not known.
|
|
||||||
*/
|
*/
|
||||||
public Cell getNextIndexedKey();
|
public Cell getNextIndexedKey();
|
||||||
}
|
}
|
|
@ -145,17 +145,17 @@ public class ScanQueryMatcher {
|
||||||
|
|
||||||
private final boolean isReversed;
|
private final boolean isReversed;
|
||||||
|
|
||||||
/**
|
|
||||||
* True if we are doing a 'Get' Scan. Every Get is actually a one-row Scan.
|
|
||||||
*/
|
|
||||||
private final boolean get;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a QueryMatcher for a scan
|
* Construct a QueryMatcher for a scan
|
||||||
|
* @param scan
|
||||||
* @param scanInfo The store's immutable scan info
|
* @param scanInfo The store's immutable scan info
|
||||||
|
* @param columns
|
||||||
* @param scanType Type of the scan
|
* @param scanType Type of the scan
|
||||||
* @param earliestPutTs Earliest put seen in any of the store files.
|
* @param earliestPutTs Earliest put seen in any of the store files.
|
||||||
* @param oldestUnexpiredTS the oldest timestamp we are interested in, based on TTL
|
* @param oldestUnexpiredTS the oldest timestamp we are interested in,
|
||||||
|
* based on TTL
|
||||||
|
* @param regionCoprocessorHost
|
||||||
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> columns,
|
public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> columns,
|
||||||
ScanType scanType, long readPointToUse, long earliestPutTs, long oldestUnexpiredTS,
|
ScanType scanType, long readPointToUse, long earliestPutTs, long oldestUnexpiredTS,
|
||||||
|
@ -166,7 +166,6 @@ public class ScanQueryMatcher {
|
||||||
} else {
|
} else {
|
||||||
this.tr = timeRange;
|
this.tr = timeRange;
|
||||||
}
|
}
|
||||||
this.get = scan.isGetScan();
|
|
||||||
this.rowComparator = scanInfo.getComparator();
|
this.rowComparator = scanInfo.getComparator();
|
||||||
this.regionCoprocessorHost = regionCoprocessorHost;
|
this.regionCoprocessorHost = regionCoprocessorHost;
|
||||||
this.deletes = instantiateDeleteTracker();
|
this.deletes = instantiateDeleteTracker();
|
||||||
|
@ -281,7 +280,7 @@ public class ScanQueryMatcher {
|
||||||
* caused by a data corruption.
|
* caused by a data corruption.
|
||||||
*/
|
*/
|
||||||
public MatchCode match(Cell cell) throws IOException {
|
public MatchCode match(Cell cell) throws IOException {
|
||||||
if (filter != null && filter.filterAllRemaining()) {
|
if (filter != null && filter.filterAllRemaining()) {
|
||||||
return MatchCode.DONE_SCAN;
|
return MatchCode.DONE_SCAN;
|
||||||
}
|
}
|
||||||
if (curCell != null) {
|
if (curCell != null) {
|
||||||
|
@ -504,27 +503,22 @@ public class ScanQueryMatcher {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @return Returns false if we know there are no more rows to be scanned (We've reached the
|
|
||||||
* <code>stopRow</code> or we are scanning on row only because this Scan is for a Get, etc.
|
|
||||||
*/
|
|
||||||
public boolean moreRowsMayExistAfter(Cell kv) {
|
public boolean moreRowsMayExistAfter(Cell kv) {
|
||||||
// If a 'get' Scan -- we are doing a Get (every Get is a single-row Scan in implementation) --
|
if (this.isReversed) {
|
||||||
// then we are looking at one row only, the one specified in the Get coordinate..so we know
|
if (rowComparator.compareRows(kv, stopRow, 0, stopRow.length) <= 0) {
|
||||||
// for sure that there are no more rows on this Scan
|
return false;
|
||||||
if (this.get) {
|
} else {
|
||||||
return false;
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// If no stopRow, return that there may be more rows. The tests that follow depend on a
|
if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
|
||||||
// non-empty, non-default stopRow so this little test below short-circuits out doing the
|
rowComparator.compareRows(kv, stopRow, 0, stopRow.length) >= 0) {
|
||||||
// following compares.
|
// KV >= STOPROW
|
||||||
if (this.stopRow == null || this.stopRow == HConstants.EMPTY_BYTE_ARRAY) {
|
// then NO there is nothing left.
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return this.isReversed?
|
|
||||||
rowComparator.compareRows(kv, stopRow, 0, stopRow.length) > 0:
|
|
||||||
Bytes.equals(stopRow, HConstants.EMPTY_END_ROW) ||
|
|
||||||
rowComparator.compareRows(kv, stopRow, 0, stopRow.length) < 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -600,12 +600,6 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
case DONE:
|
case DONE:
|
||||||
// Optimization for Gets! If DONE, no more to get on this row, early exit!
|
|
||||||
if (this.scan.isGetScan()) {
|
|
||||||
// Then no more to this row... exit.
|
|
||||||
close(false);// Do all cleanup except heap.close()
|
|
||||||
return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
|
|
||||||
}
|
|
||||||
matcher.curCell = null;
|
matcher.curCell = null;
|
||||||
return scannerContext.setScannerState(NextState.MORE_VALUES).hasMoreValues();
|
return scannerContext.setScannerState(NextState.MORE_VALUES).hasMoreValues();
|
||||||
|
|
||||||
|
@ -655,67 +649,18 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
||||||
return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
|
return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* See if we should actually SEEK or rather just SKIP to the next Cell (see HBASE-13109).
|
* See if we should actually SEEK or rather just SKIP to the next Cell.
|
||||||
* This method works together with ColumnTrackers and Filters. ColumnTrackers may issue SEEK
|
* (see HBASE-13109)
|
||||||
* hints, such as seek to next column, next row, or seek to an arbitrary seek key.
|
|
||||||
* This method intercepts these qcodes and decides whether a seek is the most efficient _actual_
|
|
||||||
* way to get us to the requested cell (SEEKs are more expensive than SKIP, SKIP, SKIP inside the
|
|
||||||
* current, loaded block).
|
|
||||||
* It does this by looking at the next indexed key of the current HFile. This key
|
|
||||||
* is then compared with the _SEEK_ key, where a SEEK key is an artificial 'last possible key
|
|
||||||
* on the row' (only in here, we avoid actually creating a SEEK key; in the compare we work with
|
|
||||||
* the current Cell but compare as though it were a seek key; see down in
|
|
||||||
* matcher.compareKeyForNextRow, etc). If the compare gets us onto the
|
|
||||||
* next block we *_SEEK, otherwise we just INCLUDE or SKIP, and let the ColumnTrackers or Filters
|
|
||||||
* go through the next Cell, and so on)
|
|
||||||
*
|
|
||||||
* <p>The ColumnTrackers and Filters must behave correctly in all cases, i.e. if they are past the
|
|
||||||
* Cells they care about they must issues a SKIP or SEEK.
|
|
||||||
*
|
|
||||||
* <p>Other notes:
|
|
||||||
* <ul>
|
|
||||||
* <li>Rows can straddle block boundaries</li>
|
|
||||||
* <li>Versions of columns can straddle block boundaries (i.e. column C1 at T1 might be in a
|
|
||||||
* different block than column C1 at T2)</li>
|
|
||||||
* <li>We want to SKIP and INCLUDE if the chance is high that we'll find the desired Cell after a
|
|
||||||
* few SKIPs...</li>
|
|
||||||
* <li>We want to INCLUDE_AND_SEEK and SEEK when the chance is high that we'll be able to seek
|
|
||||||
* past many Cells, especially if we know we need to go to the next block.</li>
|
|
||||||
* </ul>
|
|
||||||
* <p>A good proxy (best effort) to determine whether INCLUDE/SKIP is better than SEEK is whether
|
|
||||||
* we'll likely end up seeking to the next block (or past the next block) to get our next column.
|
|
||||||
* Example:
|
|
||||||
* <pre>
|
|
||||||
* | BLOCK 1 | BLOCK 2 |
|
|
||||||
* | r1/c1, r1/c2, r1/c3 | r1/c4, r1/c5, r2/c1 |
|
|
||||||
* ^ ^
|
|
||||||
* | |
|
|
||||||
* Next Index Key SEEK_NEXT_ROW (before r2/c1)
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* | BLOCK 1 | BLOCK 2 |
|
|
||||||
* | r1/c1/t5, r1/c1/t4, r1/c1/t3 | r1/c1/t2, r1/c1/T1, r1/c2/T3 |
|
|
||||||
* ^ ^
|
|
||||||
* | |
|
|
||||||
* Next Index Key SEEK_NEXT_COL
|
|
||||||
* </pre>
|
|
||||||
* Now imagine we want columns c1 and c3 (see first diagram above), the 'Next Index Key' of r1/c4
|
|
||||||
* is > r1/c3 so we should seek to get to the c1 on the next row, r2. In second case, say we only
|
|
||||||
* want one version of c1, after we have it, a SEEK_COL will be issued to get to c2. Looking at
|
|
||||||
* the 'Next Index Key', it would land us in the next block, so we should SEEK. In other scenarios
|
|
||||||
* where the SEEK will not land us in the next block, it is very likely better to issues a series
|
|
||||||
* of SKIPs.
|
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
private ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) {
|
||||||
protected ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) {
|
|
||||||
switch(qcode) {
|
switch(qcode) {
|
||||||
case INCLUDE_AND_SEEK_NEXT_COL:
|
case INCLUDE_AND_SEEK_NEXT_COL:
|
||||||
case SEEK_NEXT_COL:
|
case SEEK_NEXT_COL:
|
||||||
{
|
{
|
||||||
Cell nextIndexedKey = getNextIndexedKey();
|
Cell nextIndexedKey = getNextIndexedKey();
|
||||||
if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY
|
if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY
|
||||||
&& matcher.compareKeyForNextColumn(nextIndexedKey, cell) > 0) {
|
&& matcher.compareKeyForNextColumn(nextIndexedKey, cell) >= 0) {
|
||||||
return qcode == MatchCode.SEEK_NEXT_COL ? MatchCode.SKIP : MatchCode.INCLUDE;
|
return qcode == MatchCode.SEEK_NEXT_COL ? MatchCode.SKIP : MatchCode.INCLUDE;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -723,16 +668,10 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
||||||
case INCLUDE_AND_SEEK_NEXT_ROW:
|
case INCLUDE_AND_SEEK_NEXT_ROW:
|
||||||
case SEEK_NEXT_ROW:
|
case SEEK_NEXT_ROW:
|
||||||
{
|
{
|
||||||
// If it is a Get Scan, then we know that we are done with this row; there are no more
|
Cell nextIndexedKey = getNextIndexedKey();
|
||||||
// rows beyond the current one: don't try to optimize. We are DONE. Return the *_NEXT_ROW
|
if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY
|
||||||
// qcode as is. When the caller gets these flags on a Get Scan, it knows it can shut down the
|
&& matcher.compareKeyForNextRow(nextIndexedKey, cell) >= 0) {
|
||||||
// Scan.
|
return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : MatchCode.INCLUDE;
|
||||||
if (!this.scan.isGetScan()) {
|
|
||||||
Cell nextIndexedKey = getNextIndexedKey();
|
|
||||||
if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY
|
|
||||||
&& matcher.compareKeyForNextRow(nextIndexedKey, cell) > 0) {
|
|
||||||
return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : MatchCode.INCLUDE;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,8 @@ import org.apache.hadoop.hbase.CellComparator;
|
||||||
import org.apache.hadoop.hbase.regionserver.NonReversedNonLazyKeyValueScanner;
|
import org.apache.hadoop.hbase.regionserver.NonReversedNonLazyKeyValueScanner;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility scanner that wraps a sortable collection and serves as a KeyValueScanner.
|
* Utility scanner that wraps a sortable collection and serves
|
||||||
|
* as a KeyValueScanner.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class CollectionBackedScanner extends NonReversedNonLazyKeyValueScanner {
|
public class CollectionBackedScanner extends NonReversedNonLazyKeyValueScanner {
|
||||||
|
|
|
@ -22,7 +22,6 @@ package org.apache.hadoop.hbase.regionserver;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.Cell;
|
|
||||||
import org.apache.hadoop.hbase.CellComparator;
|
import org.apache.hadoop.hbase.CellComparator;
|
||||||
import org.apache.hadoop.hbase.KeyValue;
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
import org.apache.hadoop.hbase.util.CollectionBackedScanner;
|
import org.apache.hadoop.hbase.util.CollectionBackedScanner;
|
||||||
|
@ -34,8 +33,9 @@ import org.apache.hadoop.hbase.util.CollectionBackedScanner;
|
||||||
* to be a store file scanner.
|
* to be a store file scanner.
|
||||||
*/
|
*/
|
||||||
public class KeyValueScanFixture extends CollectionBackedScanner {
|
public class KeyValueScanFixture extends CollectionBackedScanner {
|
||||||
public KeyValueScanFixture(CellComparator comparator, Cell... cells) {
|
public KeyValueScanFixture(CellComparator comparator,
|
||||||
super(comparator, cells);
|
KeyValue... incData) {
|
||||||
|
super(comparator, incData);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<KeyValueScanner> scanFixture(KeyValue[] ... kvArrays) {
|
public static List<KeyValueScanner> scanFixture(KeyValue[] ... kvArrays) {
|
||||||
|
|
|
@ -46,7 +46,8 @@ public class TestKeyValueScanFixture extends TestCase {
|
||||||
KeyValueTestUtil.create("RowB", "family", "qf1",
|
KeyValueTestUtil.create("RowB", "family", "qf1",
|
||||||
10, KeyValue.Type.Put, "value-10")
|
10, KeyValue.Type.Put, "value-10")
|
||||||
};
|
};
|
||||||
KeyValueScanner scan = new KeyValueScanFixture(CellComparator.COMPARATOR, kvs);
|
KeyValueScanner scan = new KeyValueScanFixture(
|
||||||
|
CellComparator.COMPARATOR, kvs);
|
||||||
|
|
||||||
KeyValue kv = KeyValueUtil.createFirstOnRow(Bytes.toBytes("RowA"));
|
KeyValue kv = KeyValueUtil.createFirstOnRow(Bytes.toBytes("RowA"));
|
||||||
// should seek to this:
|
// should seek to this:
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
package org.apache.hadoop.hbase.regionserver;
|
package org.apache.hadoop.hbase.regionserver;
|
||||||
|
|
||||||
import static org.apache.hadoop.hbase.regionserver.KeyValueScanFixture.scanFixture;
|
import static org.apache.hadoop.hbase.regionserver.KeyValueScanFixture.scanFixture;
|
||||||
import static org.junit.Assert.*;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -28,21 +27,16 @@ import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.NavigableSet;
|
import java.util.NavigableSet;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hbase.CategoryBasedTimeout;
|
import org.apache.hadoop.hbase.CategoryBasedTimeout;
|
||||||
import org.apache.hadoop.hbase.Cell;
|
import org.apache.hadoop.hbase.Cell;
|
||||||
import org.apache.hadoop.hbase.CellComparator;
|
import org.apache.hadoop.hbase.CellComparator;
|
||||||
import org.apache.hadoop.hbase.CellUtil;
|
|
||||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.KeepDeletedCells;
|
import org.apache.hadoop.hbase.KeepDeletedCells;
|
||||||
import org.apache.hadoop.hbase.KeyValue;
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
import org.apache.hadoop.hbase.KeyValueTestUtil;
|
import org.apache.hadoop.hbase.KeyValueTestUtil;
|
||||||
import org.apache.hadoop.hbase.client.Get;
|
|
||||||
import org.apache.hadoop.hbase.client.Scan;
|
import org.apache.hadoop.hbase.client.Scan;
|
||||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||||
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
|
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
|
||||||
|
@ -59,113 +53,16 @@ import org.junit.rules.TestRule;
|
||||||
// Can't be small as it plays with EnvironmentEdgeManager
|
// Can't be small as it plays with EnvironmentEdgeManager
|
||||||
@Category({RegionServerTests.class, MediumTests.class})
|
@Category({RegionServerTests.class, MediumTests.class})
|
||||||
public class TestStoreScanner {
|
public class TestStoreScanner {
|
||||||
private static final Log LOG = LogFactory.getLog(TestStoreScanner.class);
|
|
||||||
@Rule public TestName name = new TestName();
|
@Rule public TestName name = new TestName();
|
||||||
@Rule public final TestRule timeout = CategoryBasedTimeout.builder().withTimeout(this.getClass()).
|
@Rule public final TestRule timeout = CategoryBasedTimeout.builder().withTimeout(this.getClass()).
|
||||||
withLookingForStuckThread(true).build();
|
withLookingForStuckThread(true).build();
|
||||||
private static final String CF_STR = "cf";
|
private static final String CF_STR = "cf";
|
||||||
private static final byte [] CF = Bytes.toBytes(CF_STR);
|
final byte [] CF = Bytes.toBytes(CF_STR);
|
||||||
static Configuration CONF = HBaseConfiguration.create();
|
static Configuration CONF = HBaseConfiguration.create();
|
||||||
private ScanInfo scanInfo = new ScanInfo(CONF, CF, 0, Integer.MAX_VALUE,
|
private ScanInfo scanInfo = new ScanInfo(CONF, CF, 0, Integer.MAX_VALUE,
|
||||||
Long.MAX_VALUE, KeepDeletedCells.FALSE, 0, CellComparator.COMPARATOR);
|
Long.MAX_VALUE, KeepDeletedCells.FALSE, 0, CellComparator.COMPARATOR);
|
||||||
private ScanType scanType = ScanType.USER_SCAN;
|
private ScanType scanType = ScanType.USER_SCAN;
|
||||||
|
|
||||||
/**
|
|
||||||
* From here on down, we have a bunch of defines and specific CELL_GRID of Cells. The
|
|
||||||
* CELL_GRID then has a Scanner that can fake out 'block' transitions. All this elaborate
|
|
||||||
* setup is for tests that ensure we don't overread, and that the
|
|
||||||
* {@link StoreScanner#optimize(org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode,
|
|
||||||
* Cell)} is not overly enthusiastic.
|
|
||||||
*/
|
|
||||||
private static final byte [] ZERO = new byte [] {'0'};
|
|
||||||
private static final byte [] ZERO_POINT_ZERO = new byte [] {'0', '.', '0'};
|
|
||||||
private static final byte [] ONE = new byte [] {'1'};
|
|
||||||
private static final byte [] TWO = new byte [] {'2'};
|
|
||||||
private static final byte [] TWO_POINT_TWO = new byte [] {'2', '.', '2'};
|
|
||||||
private static final byte [] THREE = new byte [] {'3'};
|
|
||||||
private static final byte [] FOUR = new byte [] {'4'};
|
|
||||||
private static final byte [] FIVE = new byte [] {'5'};
|
|
||||||
private static final byte [] VALUE = new byte [] {'v'};
|
|
||||||
private static final int CELL_GRID_BLOCK2_BOUNDARY = 4;
|
|
||||||
private static final int CELL_GRID_BLOCK3_BOUNDARY = 11;
|
|
||||||
private static final int CELL_GRID_BLOCK4_BOUNDARY = 15;
|
|
||||||
private static final int CELL_GRID_BLOCK5_BOUNDARY = 19;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Five rows by four columns distinguished by column qualifier (column qualifier is one of the
|
|
||||||
* four rows... ONE, TWO, etc.). Exceptions are a weird row after TWO; it is TWO_POINT_TWO.
|
|
||||||
* And then row FOUR has five columns finishing w/ row FIVE having a single column.
|
|
||||||
* We will use this to test scan does the right thing as it
|
|
||||||
* we do Gets, StoreScanner#optimize, and what we do on (faked) block boundaries.
|
|
||||||
*/
|
|
||||||
private static final Cell [] CELL_GRID = new Cell [] {
|
|
||||||
CellUtil.createCell(ONE, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(ONE, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(ONE, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(ONE, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
// Offset 4 CELL_GRID_BLOCK2_BOUNDARY
|
|
||||||
CellUtil.createCell(TWO, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(TWO, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(TWO, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(TWO, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(TWO_POINT_TWO, CF, ZERO, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(TWO_POINT_TWO, CF, ZERO_POINT_ZERO, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(TWO_POINT_TWO, CF, FIVE, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
// Offset 11! CELL_GRID_BLOCK3_BOUNDARY
|
|
||||||
CellUtil.createCell(THREE, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(THREE, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(THREE, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(THREE, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
// Offset 15 CELL_GRID_BLOCK4_BOUNDARY
|
|
||||||
CellUtil.createCell(FOUR, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(FOUR, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(FOUR, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(FOUR, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
// Offset 19 CELL_GRID_BLOCK5_BOUNDARY
|
|
||||||
CellUtil.createCell(FOUR, CF, FIVE, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
CellUtil.createCell(FIVE, CF, ZERO, 1L, KeyValue.Type.Put.getCode(), VALUE),
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A StoreScanner for our CELL_GRID above. Fakes the block transitions. Does counts of
|
|
||||||
* calls to optimize and counts of when optimize actually did an optimize.
|
|
||||||
*/
|
|
||||||
private static class CellGridStoreScanner extends StoreScanner {
|
|
||||||
// Count of how often optimize is called and of how often it does an optimize.
|
|
||||||
final AtomicInteger count = new AtomicInteger(0);
|
|
||||||
final AtomicInteger optimization = new AtomicInteger(0);
|
|
||||||
|
|
||||||
CellGridStoreScanner(final Scan scan, ScanInfo scanInfo, ScanType scanType)
|
|
||||||
throws IOException {
|
|
||||||
super(scan, scanInfo, scanType, scan.getFamilyMap().get(CF),
|
|
||||||
Arrays.<KeyValueScanner>asList(
|
|
||||||
new KeyValueScanner[] {new KeyValueScanFixture(CellComparator.COMPARATOR, CELL_GRID)}));
|
|
||||||
}
|
|
||||||
|
|
||||||
protected ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) {
|
|
||||||
count.incrementAndGet();
|
|
||||||
ScanQueryMatcher.MatchCode after = super.optimize(qcode, cell);
|
|
||||||
LOG.info("Cell=" + cell + ", nextIndex=" + CellUtil.toString(getNextIndexedKey(), false) +
|
|
||||||
", before=" + qcode + ", after=" + after);
|
|
||||||
if (qcode != after) {
|
|
||||||
optimization.incrementAndGet();
|
|
||||||
}
|
|
||||||
return after;
|
|
||||||
};
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Cell getNextIndexedKey() {
|
|
||||||
// Fake block boundaries by having index of next block change as we go through scan.
|
|
||||||
return count.get() > CELL_GRID_BLOCK4_BOUNDARY?
|
|
||||||
CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK5_BOUNDARY]):
|
|
||||||
count.get() > CELL_GRID_BLOCK3_BOUNDARY?
|
|
||||||
CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK4_BOUNDARY]):
|
|
||||||
count.get() > CELL_GRID_BLOCK2_BOUNDARY?
|
|
||||||
CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK3_BOUNDARY]):
|
|
||||||
CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK2_BOUNDARY]);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test utility for building a NavigableSet for scanners.
|
* Test utility for building a NavigableSet for scanners.
|
||||||
* @param strCols
|
* @param strCols
|
||||||
|
@ -180,145 +77,6 @@ public class TestStoreScanner {
|
||||||
return cols;
|
return cols;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFullRowGetDoesNotOverreadWhenRowInsideOneBlock() throws IOException {
|
|
||||||
// Do a Get against row two. Row two is inside a block that starts with row TWO but ends with
|
|
||||||
// row TWO_POINT_TWO. We should read one block only.
|
|
||||||
Get get = new Get(TWO);
|
|
||||||
Scan scan = new Scan(get);
|
|
||||||
CellGridStoreScanner scanner = new CellGridStoreScanner(scan, this.scanInfo, this.scanType);
|
|
||||||
try {
|
|
||||||
List<Cell> results = new ArrayList<Cell>();
|
|
||||||
while (scanner.next(results)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Should be four results of column 1 (though there are 5 rows in the CELL_GRID -- the
|
|
||||||
// TWO_POINT_TWO row does not have a a column ONE.
|
|
||||||
Assert.assertEquals(4, results.size());
|
|
||||||
// We should have gone the optimize route 5 times totally... an INCLUDE for the four cells
|
|
||||||
// in the row plus the DONE on the end.
|
|
||||||
Assert.assertEquals(5, scanner.count.get());
|
|
||||||
// For a full row Get, there should be no opportunity for scanner optimization.
|
|
||||||
Assert.assertEquals(0, scanner.optimization.get());
|
|
||||||
} finally {
|
|
||||||
scanner.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFullRowSpansBlocks() throws IOException {
|
|
||||||
// Do a Get against row FOUR. It spans two blocks.
|
|
||||||
Get get = new Get(FOUR);
|
|
||||||
Scan scan = new Scan(get);
|
|
||||||
CellGridStoreScanner scanner = new CellGridStoreScanner(scan, this.scanInfo, this.scanType);
|
|
||||||
try {
|
|
||||||
List<Cell> results = new ArrayList<Cell>();
|
|
||||||
while (scanner.next(results)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Should be four results of column 1 (though there are 5 rows in the CELL_GRID -- the
|
|
||||||
// TWO_POINT_TWO row does not have a a column ONE.
|
|
||||||
Assert.assertEquals(5, results.size());
|
|
||||||
// We should have gone the optimize route 6 times totally... an INCLUDE for the five cells
|
|
||||||
// in the row plus the DONE on the end.
|
|
||||||
Assert.assertEquals(6, scanner.count.get());
|
|
||||||
// For a full row Get, there should be no opportunity for scanner optimization.
|
|
||||||
Assert.assertEquals(0, scanner.optimization.get());
|
|
||||||
} finally {
|
|
||||||
scanner.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test optimize in StoreScanner. Test that we skip to the next 'block' when we it makes sense
|
|
||||||
* reading the block 'index'.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testOptimize() throws IOException {
|
|
||||||
Scan scan = new Scan();
|
|
||||||
// A scan that just gets the first qualifier on each row of the CELL_GRID
|
|
||||||
scan.addColumn(CF, ONE);
|
|
||||||
CellGridStoreScanner scanner = new CellGridStoreScanner(scan, this.scanInfo, this.scanType);
|
|
||||||
try {
|
|
||||||
List<Cell> results = new ArrayList<Cell>();
|
|
||||||
while (scanner.next(results)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Should be four results of column 1 (though there are 5 rows in the CELL_GRID -- the
|
|
||||||
// TWO_POINT_TWO row does not have a a column ONE.
|
|
||||||
Assert.assertEquals(4, results.size());
|
|
||||||
for (Cell cell: results) {
|
|
||||||
assertTrue(Bytes.equals(ONE, 0, ONE.length,
|
|
||||||
cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()));
|
|
||||||
}
|
|
||||||
Assert.assertTrue("Optimize should do some optimizations", scanner.optimization.get() > 0);
|
|
||||||
} finally {
|
|
||||||
scanner.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Ensure the optimize Scan method in StoreScanner does not get in the way of a Get doing minimum
|
|
||||||
* work... seeking to start of block and then SKIPPING until we find the wanted Cell.
|
|
||||||
* This 'simple' scenario mimics case of all Cells fitting inside a single HFileBlock.
|
|
||||||
* See HBASE-15392. This test is a little cryptic. Takes a bit of staring to figure what it up to.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testOptimizeAndGet() throws IOException {
|
|
||||||
// First test a Get of two columns in the row R2. Every Get is a Scan. Get columns named
|
|
||||||
// R2 and R3.
|
|
||||||
Get get = new Get(TWO);
|
|
||||||
get.addColumn(CF, TWO);
|
|
||||||
get.addColumn(CF, THREE);
|
|
||||||
Scan scan = new Scan(get);
|
|
||||||
CellGridStoreScanner scanner = new CellGridStoreScanner(scan, this.scanInfo, this.scanType);
|
|
||||||
try {
|
|
||||||
List<Cell> results = new ArrayList<Cell>();
|
|
||||||
// For a Get there should be no more next's after the first call.
|
|
||||||
Assert.assertEquals(false, scanner.next(results));
|
|
||||||
// Should be one result only.
|
|
||||||
Assert.assertEquals(2, results.size());
|
|
||||||
// And we should have gone through optimize twice only.
|
|
||||||
Assert.assertEquals("First qcode is SEEK_NEXT_COL and second INCLUDE_AND_SEEK_NEXT_ROW",
|
|
||||||
3, scanner.count.get());
|
|
||||||
} finally {
|
|
||||||
scanner.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Ensure that optimize does not cause the Get to do more seeking than required. Optimize
|
|
||||||
* (see HBASE-15392) was causing us to seek all Cells in a block when a Get Scan if the next block
|
|
||||||
* index/start key was a different row to the current one. A bug. We'd call next too often
|
|
||||||
* because we had to exhaust all Cells in the current row making us load the next block just to
|
|
||||||
* discard what we read there. This test is a little cryptic. Takes a bit of staring to figure
|
|
||||||
* what it up to.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testOptimizeAndGetWithFakedNextBlockIndexStart() throws IOException {
|
|
||||||
// First test a Get of second column in the row R2. Every Get is a Scan. Second column has a
|
|
||||||
// qualifier of R2.
|
|
||||||
Get get = new Get(THREE);
|
|
||||||
get.addColumn(CF, TWO);
|
|
||||||
Scan scan = new Scan(get);
|
|
||||||
CellGridStoreScanner scanner = new CellGridStoreScanner(scan, this.scanInfo, this.scanType);
|
|
||||||
try {
|
|
||||||
List<Cell> results = new ArrayList<Cell>();
|
|
||||||
// For a Get there should be no more next's after the first call.
|
|
||||||
Assert.assertEquals(false, scanner.next(results));
|
|
||||||
// Should be one result only.
|
|
||||||
Assert.assertEquals(1, results.size());
|
|
||||||
// And we should have gone through optimize twice only.
|
|
||||||
Assert.assertEquals("First qcode is SEEK_NEXT_COL and second INCLUDE_AND_SEEK_NEXT_ROW",
|
|
||||||
2, scanner.count.get());
|
|
||||||
} finally {
|
|
||||||
scanner.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testScanTimeRange() throws IOException {
|
public void testScanTimeRange() throws IOException {
|
||||||
String r1 = "R1";
|
String r1 = "R1";
|
||||||
|
|
Loading…
Reference in New Issue