HBASE-3684 Support column range filter
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1087593 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
22e5b51c7b
commit
d8d4fc8b7c
|
@ -119,6 +119,7 @@ Release 0.91.0 - Unreleased
|
|||
HBASE-3705 Allow passing timestamp into importtsv (Andy Sautins via Stack)
|
||||
HBASE-3715 Book.xml - adding architecture section on client, adding section
|
||||
on spec-ex under mapreduce (Doug Meil via Stack)
|
||||
HBASE-3684 Support column range filter (Jerry Chen via Stack)
|
||||
|
||||
TASK
|
||||
HBASE-3559 Move report of split to master OFF the heartbeat channel
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* Copyright 2010 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.io.DataInput;
|
||||
|
||||
/**
|
||||
* This filter is used for selecting only those keys with columns that are
|
||||
* between minColumn to maxColumn. For example, if minColumn is 'an', and
|
||||
* maxColumn is 'be', it will pass keys with columns like 'ana', 'bad', but not
|
||||
* keys with columns like 'bed', 'eye'
|
||||
*
|
||||
* If minColumn is null, there is no lower bound. If maxColumn is null, there is
|
||||
* no upper bound.
|
||||
*
|
||||
* minColumnInclusive and maxColumnInclusive specify if the ranges are inclusive
|
||||
* or not.
|
||||
*/
|
||||
public class ColumnRangeFilter extends FilterBase {
|
||||
protected byte[] minColumn = null;
|
||||
protected boolean minColumnInclusive = true;
|
||||
protected byte[] maxColumn = null;
|
||||
protected boolean maxColumnInclusive = false;
|
||||
|
||||
public ColumnRangeFilter() {
|
||||
super();
|
||||
}
|
||||
/**
|
||||
* Create a filter to select those keys with columns that are between minColumn
|
||||
* and maxColumn.
|
||||
* @param minColumn minimum value for the column range. If if it's null,
|
||||
* there is no lower bound.
|
||||
* @param minColumnInclusive if true, include minColumn in the range.
|
||||
* @param maxColumn maximum value for the column range. If it's null,
|
||||
* @param maxColumnInclusive if true, include maxColumn in the range.
|
||||
* there is no upper bound.
|
||||
*/
|
||||
public ColumnRangeFilter(final byte[] minColumn, boolean minColumnInclusive,
|
||||
final byte[] maxColumn, boolean maxColumnInclusive) {
|
||||
this.minColumn = minColumn;
|
||||
this.minColumnInclusive = minColumnInclusive;
|
||||
this.maxColumn = maxColumn;
|
||||
this.maxColumnInclusive = maxColumnInclusive;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return if min column range is inclusive.
|
||||
*/
|
||||
public boolean isMinColumnInclusive() {
|
||||
return minColumnInclusive;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return if max column range is inclusive.
|
||||
*/
|
||||
public boolean isMaxColumnInclusive() {
|
||||
return maxColumnInclusive;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the min column range for the filter
|
||||
*/
|
||||
public byte[] getMinColumn() {
|
||||
return this.minColumn;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the max column range for the filter
|
||||
*/
|
||||
public byte[] getMaxColumn() {
|
||||
return this.maxColumn;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReturnCode filterKeyValue(KeyValue kv) {
|
||||
byte[] buffer = kv.getBuffer();
|
||||
int qualifierOffset = kv.getQualifierOffset();
|
||||
int qualifierLength = kv.getQualifierLength();
|
||||
int cmpMin = 1;
|
||||
|
||||
if (this.minColumn != null) {
|
||||
cmpMin = Bytes.compareTo(buffer, qualifierOffset, qualifierLength,
|
||||
this.minColumn, 0, this.minColumn.length);
|
||||
}
|
||||
|
||||
if (cmpMin < 0) {
|
||||
return ReturnCode.SEEK_NEXT_USING_HINT;
|
||||
}
|
||||
|
||||
if (!this.minColumnInclusive && cmpMin == 0) {
|
||||
return ReturnCode.SKIP;
|
||||
}
|
||||
|
||||
if (this.maxColumn == null) {
|
||||
return ReturnCode.INCLUDE;
|
||||
}
|
||||
|
||||
int cmpMax = Bytes.compareTo(buffer, qualifierOffset, qualifierLength,
|
||||
this.maxColumn, 0, this.maxColumn.length);
|
||||
|
||||
if (this.maxColumnInclusive && cmpMax <= 0 ||
|
||||
!this.maxColumnInclusive && cmpMax < 0) {
|
||||
return ReturnCode.INCLUDE;
|
||||
}
|
||||
|
||||
return ReturnCode.NEXT_ROW;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
// need to write out a flag for null value separately. Otherwise,
|
||||
// we will not be able to differentiate empty string and null
|
||||
out.writeBoolean(this.minColumn == null);
|
||||
Bytes.writeByteArray(out, this.minColumn);
|
||||
out.writeBoolean(this.minColumnInclusive);
|
||||
|
||||
out.writeBoolean(this.maxColumn == null);
|
||||
Bytes.writeByteArray(out, this.maxColumn);
|
||||
out.writeBoolean(this.maxColumnInclusive);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
boolean isMinColumnNull = in.readBoolean();
|
||||
this.minColumn = Bytes.readByteArray(in);
|
||||
|
||||
if (isMinColumnNull) {
|
||||
this.minColumn = null;
|
||||
}
|
||||
|
||||
this.minColumnInclusive = in.readBoolean();
|
||||
|
||||
boolean isMaxColumnNull = in.readBoolean();
|
||||
this.maxColumn = Bytes.readByteArray(in);
|
||||
if (isMaxColumnNull) {
|
||||
this.maxColumn = null;
|
||||
}
|
||||
this.maxColumnInclusive = in.readBoolean();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public KeyValue getNextKeyHint(KeyValue kv) {
|
||||
return KeyValue.createFirstOnRow(kv.getBuffer(), kv.getRowOffset(), kv
|
||||
.getRowLength(), kv.getBuffer(), kv.getFamilyOffset(), kv
|
||||
.getFamilyLength(), this.minColumn, 0, this.minColumn == null ? 0
|
||||
: this.minColumn.length);
|
||||
}
|
||||
}
|
|
@ -64,6 +64,7 @@ import org.apache.hadoop.hbase.filter.BinaryComparator;
|
|||
import org.apache.hadoop.hbase.filter.BitComparator;
|
||||
import org.apache.hadoop.hbase.filter.ColumnCountGetFilter;
|
||||
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
|
||||
import org.apache.hadoop.hbase.filter.ColumnRangeFilter;
|
||||
import org.apache.hadoop.hbase.filter.CompareFilter;
|
||||
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
|
||||
import org.apache.hadoop.hbase.filter.DependentColumnFilter;
|
||||
|
@ -222,6 +223,8 @@ public class HbaseObjectWritable implements Writable, WritableWithSize, Configur
|
|||
addToMap(RandomRowFilter.class, code++);
|
||||
|
||||
addToMap(CompareOp.class, code++);
|
||||
|
||||
addToMap(ColumnRangeFilter.class, code++);
|
||||
}
|
||||
|
||||
private Class<?> declaredClass;
|
||||
|
|
|
@ -0,0 +1,242 @@
|
|||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValueTestUtil;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.InternalScanner;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.Test;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
class StringRange {
|
||||
private String start = null;
|
||||
private String end = null;
|
||||
private boolean startInclusive = true;
|
||||
private boolean endInclusive = false;
|
||||
|
||||
public StringRange(String start, boolean startInclusive, String end,
|
||||
boolean endInclusive) {
|
||||
this.start = start;
|
||||
this.startInclusive = startInclusive;
|
||||
this.end = end;
|
||||
this.endInclusive = endInclusive;
|
||||
}
|
||||
|
||||
public String getStart() {
|
||||
return this.start;
|
||||
}
|
||||
|
||||
public String getEnd() {
|
||||
return this.end;
|
||||
}
|
||||
|
||||
public boolean isStartInclusive() {
|
||||
return this.startInclusive;
|
||||
}
|
||||
|
||||
public boolean isEndInclusive() {
|
||||
return this.endInclusive;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hashCode = 0;
|
||||
if (this.start != null) {
|
||||
hashCode ^= this.start.hashCode();
|
||||
}
|
||||
|
||||
if (this.end != null) {
|
||||
hashCode ^= this.end.hashCode();
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String result = (this.startInclusive ? "[" : "(")
|
||||
+ (this.start == null ? null : this.start) + ", "
|
||||
+ (this.end == null ? null : this.end)
|
||||
+ (this.endInclusive ? "]" : ")");
|
||||
return result;
|
||||
}
|
||||
|
||||
public boolean inRange(String value) {
|
||||
boolean afterStart = true;
|
||||
if (this.start != null) {
|
||||
int startCmp = value.compareTo(this.start);
|
||||
afterStart = this.startInclusive ? startCmp >= 0 : startCmp > 0;
|
||||
}
|
||||
|
||||
boolean beforeEnd = true;
|
||||
if (this.end != null) {
|
||||
int endCmp = value.compareTo(this.end);
|
||||
beforeEnd = this.endInclusive ? endCmp <= 0 : endCmp < 0;
|
||||
}
|
||||
|
||||
return afterStart && beforeEnd;
|
||||
}
|
||||
}
|
||||
|
||||
public class TestColumnRangeFilter {
|
||||
|
||||
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||
|
||||
private final Log LOG = LogFactory.getLog(this.getClass());
|
||||
|
||||
/**
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
TEST_UTIL.startMiniCluster(3);
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@AfterClass
|
||||
public static void tearDownAfterClass() throws Exception {
|
||||
TEST_UTIL.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
// Nothing to do.
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws java.lang.Exception
|
||||
*/
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
// Nothing to do.
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestColumnRangeFilterClient() throws Exception {
|
||||
String family = "Family";
|
||||
String table = "TestColumnRangeFilterClient";
|
||||
HTable ht = TEST_UTIL.createTable(Bytes.toBytes(table),
|
||||
Bytes.toBytes(family), Integer.MAX_VALUE);
|
||||
|
||||
List<String> rows = generateRandomWords(10, 8);
|
||||
long maxTimestamp = 2;
|
||||
List<String> columns = generateRandomWords(20000, 8);
|
||||
|
||||
List<KeyValue> kvList = new ArrayList<KeyValue>();
|
||||
|
||||
Map<StringRange, List<KeyValue>> rangeMap = new HashMap<StringRange, List<KeyValue>>();
|
||||
|
||||
rangeMap.put(new StringRange(null, true, "b", false),
|
||||
new ArrayList<KeyValue>());
|
||||
rangeMap.put(new StringRange("p", true, "q", false),
|
||||
new ArrayList<KeyValue>());
|
||||
rangeMap.put(new StringRange("r", false, "s", true),
|
||||
new ArrayList<KeyValue>());
|
||||
rangeMap.put(new StringRange("z", false, null, false),
|
||||
new ArrayList<KeyValue>());
|
||||
String valueString = "ValueString";
|
||||
|
||||
for (String row : rows) {
|
||||
Put p = new Put(Bytes.toBytes(row));
|
||||
for (String column : columns) {
|
||||
for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
|
||||
KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp,
|
||||
valueString);
|
||||
p.add(kv);
|
||||
kvList.add(kv);
|
||||
for (StringRange s : rangeMap.keySet()) {
|
||||
if (s.inRange(column)) {
|
||||
rangeMap.get(s).add(kv);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ht.put(p);
|
||||
}
|
||||
|
||||
TEST_UTIL.flush();
|
||||
try {
|
||||
Thread.sleep(3000);
|
||||
} catch (InterruptedException i) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
ColumnRangeFilter filter;
|
||||
Scan scan = new Scan();
|
||||
scan.setMaxVersions();
|
||||
for (StringRange s : rangeMap.keySet()) {
|
||||
filter = new ColumnRangeFilter(s.getStart() == null ? null
|
||||
: Bytes.toBytes(s.getStart()), s.isStartInclusive(),
|
||||
s.getEnd() == null ? null : Bytes.toBytes(s.getEnd()),
|
||||
s.isEndInclusive());
|
||||
scan.setFilter(filter);
|
||||
ResultScanner scanner = ht.getScanner(scan);
|
||||
List<KeyValue> results = new ArrayList<KeyValue>();
|
||||
LOG.info("scan column range: " + s.toString());
|
||||
long timeBeforeScan = System.currentTimeMillis();
|
||||
|
||||
Result result;
|
||||
while ((result = scanner.next()) != null) {
|
||||
for (KeyValue kv : result.list()) {
|
||||
results.add(kv);
|
||||
}
|
||||
}
|
||||
long scanTime = System.currentTimeMillis() - timeBeforeScan;
|
||||
LOG.info("scan time = " + scanTime + "ms");
|
||||
LOG.info("found " + results.size() + " results");
|
||||
LOG.info("Expecting " + rangeMap.get(s).size() + " results");
|
||||
|
||||
/*
|
||||
for (KeyValue kv : results) {
|
||||
LOG.info("found row " + Bytes.toString(kv.getRow()) + ", column "
|
||||
+ Bytes.toString(kv.getQualifier()));
|
||||
}
|
||||
*/
|
||||
|
||||
assertEquals(rangeMap.get(s).size(), results.size());
|
||||
}
|
||||
}
|
||||
|
||||
List<String> generateRandomWords(int numberOfWords, int maxLengthOfWords) {
|
||||
Set<String> wordSet = new HashSet<String>();
|
||||
for (int i = 0; i < numberOfWords; i++) {
|
||||
int lengthOfWords = (int) (Math.random() * maxLengthOfWords) + 1;
|
||||
char[] wordChar = new char[lengthOfWords];
|
||||
for (int j = 0; j < wordChar.length; j++) {
|
||||
wordChar[j] = (char) (Math.random() * 26 + 97);
|
||||
}
|
||||
String word = new String(wordChar);
|
||||
wordSet.add(word);
|
||||
}
|
||||
List<String> wordList = new ArrayList<String>(wordSet);
|
||||
return wordList;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue