HBASE-8063 Filter HFiles based on first/last key (Liang Xie)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1455971 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6f97d4b20d
commit
afcfbd1a8a
|
@ -43,6 +43,7 @@ import org.apache.hadoop.hbase.HConstants;
|
|||
import org.apache.hadoop.hbase.HDFSBlocksDistribution;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValue.KVComparator;
|
||||
import org.apache.hadoop.hbase.KeyValue.MetaKeyComparator;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.io.Reference;
|
||||
import org.apache.hadoop.hbase.io.compress.Compression;
|
||||
|
@ -1401,6 +1402,28 @@ public class StoreFile {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether the given scan rowkey range overlaps with the current storefile's
|
||||
* @param scan the scan specification. Used to determine the rowkey range.
|
||||
* @return true if there is overlap, false otherwise
|
||||
*/
|
||||
boolean passesKeyRangeFilter(Scan scan) {
|
||||
if (this.getFirstKey() == null || this.getLastKey() == null) {
|
||||
// the file is empty
|
||||
return false;
|
||||
}
|
||||
if (Bytes.equals(scan.getStartRow(), HConstants.EMPTY_START_ROW)
|
||||
&& Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) {
|
||||
return true;
|
||||
}
|
||||
KeyValue startKeyValue = KeyValue.createFirstOnRow(scan.getStartRow());
|
||||
KeyValue stopKeyValue = KeyValue.createLastOnRow(scan.getStopRow());
|
||||
boolean nonOverLapping = (getComparator().compare(this.getFirstKey(),
|
||||
stopKeyValue.getKey()) > 0 && !Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW))
|
||||
|| getComparator().compare(this.getLastKey(), startKeyValue.getKey()) < 0;
|
||||
return !nonOverLapping;
|
||||
}
|
||||
|
||||
public Map<byte[], byte[]> loadFileInfo() throws IOException {
|
||||
Map<byte [], byte []> fi = reader.loadFileInfo();
|
||||
|
||||
|
|
|
@ -369,9 +369,8 @@ public class StoreFileScanner implements KeyValueScanner {
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldUseScanner(Scan scan, SortedSet<byte[]> columns,
|
||||
long oldestUnexpiredTS) {
|
||||
return reader.passesTimerangeFilter(scan, oldestUnexpiredTS) &&
|
||||
reader.passesBloomFilter(scan, columns);
|
||||
public boolean shouldUseScanner(Scan scan, SortedSet<byte[]> columns, long oldestUnexpiredTS) {
|
||||
return reader.passesTimerangeFilter(scan, oldestUnexpiredTS)
|
||||
&& reader.passesKeyRangeFilter(scan) && reader.passesBloomFilter(scan, columns);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.io.hfile;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.SmallTests;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.regionserver.BloomType;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.InternalScanner;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameters;
|
||||
|
||||
/**
|
||||
* Test the optimization that does not scan files where all key ranges are excluded.
|
||||
*/
|
||||
@RunWith(Parameterized.class)
|
||||
@Category(SmallTests.class)
|
||||
public class TestScannerSelectionUsingKeyRange {
|
||||
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||
private static String TABLE = "myTable";
|
||||
private static String FAMILY = "myCF";
|
||||
private static byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
|
||||
private static final int NUM_ROWS = 8;
|
||||
private static final int NUM_COLS_PER_ROW = 5;
|
||||
private static final int NUM_FILES = 2;
|
||||
private static final Map<Object, Integer> TYPE_COUNT = new HashMap<Object, Integer>(3);
|
||||
static {
|
||||
TYPE_COUNT.put(BloomType.ROWCOL, 0);
|
||||
TYPE_COUNT.put(BloomType.ROW, 0);
|
||||
TYPE_COUNT.put(BloomType.NONE, 0);
|
||||
}
|
||||
|
||||
private BloomType bloomType;
|
||||
private int expectedCount;
|
||||
|
||||
@Parameters
|
||||
public static Collection<Object[]> parameters() {
|
||||
List<Object[]> params = new ArrayList<Object[]>();
|
||||
for (Object type : TYPE_COUNT.keySet()) {
|
||||
params.add(new Object[] { type, TYPE_COUNT.get(type) });
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
public TestScannerSelectionUsingKeyRange(Object type, Object count) {
|
||||
bloomType = (BloomType)type;
|
||||
expectedCount = (Integer) count;
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDownAfterClass() throws Exception {
|
||||
TEST_UTIL.cleanupTestDir();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testScannerSelection() throws IOException {
|
||||
Configuration conf = TEST_UTIL.getConfiguration();
|
||||
conf.setInt("hbase.hstore.compactionThreshold", 10000);
|
||||
HColumnDescriptor hcd = new HColumnDescriptor(FAMILY_BYTES).setBlockCacheEnabled(true)
|
||||
.setBloomFilterType(bloomType);
|
||||
HTableDescriptor htd = new HTableDescriptor(TABLE);
|
||||
htd.addFamily(hcd);
|
||||
HRegionInfo info = new HRegionInfo(Bytes.toBytes(TABLE));
|
||||
HRegion region = HRegion.createHRegion(info, TEST_UTIL.getDataTestDir(), conf, htd);
|
||||
|
||||
for (int iFile = 0; iFile < NUM_FILES; ++iFile) {
|
||||
for (int iRow = 0; iRow < NUM_ROWS; ++iRow) {
|
||||
Put put = new Put(Bytes.toBytes("row" + iRow));
|
||||
for (int iCol = 0; iCol < NUM_COLS_PER_ROW; ++iCol) {
|
||||
put.add(FAMILY_BYTES, Bytes.toBytes("col" + iCol),
|
||||
Bytes.toBytes("value" + iFile + "_" + iRow + "_" + iCol));
|
||||
}
|
||||
region.put(put);
|
||||
}
|
||||
region.flushcache();
|
||||
}
|
||||
|
||||
Scan scan = new Scan(Bytes.toBytes("aaa"), Bytes.toBytes("aaz"));
|
||||
CacheConfig cacheConf = new CacheConfig(conf);
|
||||
LruBlockCache cache = (LruBlockCache) cacheConf.getBlockCache();
|
||||
cache.clearCache();
|
||||
InternalScanner scanner = region.getScanner(scan);
|
||||
List<KeyValue> results = new ArrayList<KeyValue>();
|
||||
while (scanner.next(results)) {
|
||||
}
|
||||
scanner.close();
|
||||
assertEquals(0, results.size());
|
||||
Set<String> accessedFiles = cache.getCachedFileNamesForTest();
|
||||
assertEquals(expectedCount, accessedFiles.size());
|
||||
region.close();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue