HBASE-3684 Support column range filter

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1087593 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-04-01 06:00:37 +00:00
parent 22e5b51c7b
commit d8d4fc8b7c
4 changed files with 418 additions and 0 deletions

View File

@ -119,6 +119,7 @@ Release 0.91.0 - Unreleased
HBASE-3705 Allow passing timestamp into importtsv (Andy Sautins via Stack)
HBASE-3715 Book.xml - adding architecture section on client, adding section
on spec-ex under mapreduce (Doug Meil via Stack)
HBASE-3684 Support column range filter (Jerry Chen via Stack)
TASK
HBASE-3559 Move report of split to master OFF the heartbeat channel

View File

@ -0,0 +1,172 @@
/*
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.DataOutput;
import java.io.IOException;
import java.io.DataInput;
/**
* This filter is used for selecting only those keys with columns that are
* between minColumn to maxColumn. For example, if minColumn is 'an', and
* maxColumn is 'be', it will pass keys with columns like 'ana', 'bad', but not
* keys with columns like 'bed', 'eye'
*
* If minColumn is null, there is no lower bound. If maxColumn is null, there is
* no upper bound.
*
* minColumnInclusive and maxColumnInclusive specify if the ranges are inclusive
* or not.
*/
public class ColumnRangeFilter extends FilterBase {
protected byte[] minColumn = null;
protected boolean minColumnInclusive = true;
protected byte[] maxColumn = null;
protected boolean maxColumnInclusive = false;
public ColumnRangeFilter() {
super();
}
/**
* Create a filter to select those keys with columns that are between minColumn
* and maxColumn.
* @param minColumn minimum value for the column range. If if it's null,
* there is no lower bound.
* @param minColumnInclusive if true, include minColumn in the range.
* @param maxColumn maximum value for the column range. If it's null,
* @param maxColumnInclusive if true, include maxColumn in the range.
* there is no upper bound.
*/
public ColumnRangeFilter(final byte[] minColumn, boolean minColumnInclusive,
final byte[] maxColumn, boolean maxColumnInclusive) {
this.minColumn = minColumn;
this.minColumnInclusive = minColumnInclusive;
this.maxColumn = maxColumn;
this.maxColumnInclusive = maxColumnInclusive;
}
/**
* @return if min column range is inclusive.
*/
public boolean isMinColumnInclusive() {
return minColumnInclusive;
}
/**
* @return if max column range is inclusive.
*/
public boolean isMaxColumnInclusive() {
return maxColumnInclusive;
}
/**
* @return the min column range for the filter
*/
public byte[] getMinColumn() {
return this.minColumn;
}
/**
* @return the max column range for the filter
*/
public byte[] getMaxColumn() {
return this.maxColumn;
}
@Override
public ReturnCode filterKeyValue(KeyValue kv) {
byte[] buffer = kv.getBuffer();
int qualifierOffset = kv.getQualifierOffset();
int qualifierLength = kv.getQualifierLength();
int cmpMin = 1;
if (this.minColumn != null) {
cmpMin = Bytes.compareTo(buffer, qualifierOffset, qualifierLength,
this.minColumn, 0, this.minColumn.length);
}
if (cmpMin < 0) {
return ReturnCode.SEEK_NEXT_USING_HINT;
}
if (!this.minColumnInclusive && cmpMin == 0) {
return ReturnCode.SKIP;
}
if (this.maxColumn == null) {
return ReturnCode.INCLUDE;
}
int cmpMax = Bytes.compareTo(buffer, qualifierOffset, qualifierLength,
this.maxColumn, 0, this.maxColumn.length);
if (this.maxColumnInclusive && cmpMax <= 0 ||
!this.maxColumnInclusive && cmpMax < 0) {
return ReturnCode.INCLUDE;
}
return ReturnCode.NEXT_ROW;
}
@Override
public void write(DataOutput out) throws IOException {
// need to write out a flag for null value separately. Otherwise,
// we will not be able to differentiate empty string and null
out.writeBoolean(this.minColumn == null);
Bytes.writeByteArray(out, this.minColumn);
out.writeBoolean(this.minColumnInclusive);
out.writeBoolean(this.maxColumn == null);
Bytes.writeByteArray(out, this.maxColumn);
out.writeBoolean(this.maxColumnInclusive);
}
@Override
public void readFields(DataInput in) throws IOException {
boolean isMinColumnNull = in.readBoolean();
this.minColumn = Bytes.readByteArray(in);
if (isMinColumnNull) {
this.minColumn = null;
}
this.minColumnInclusive = in.readBoolean();
boolean isMaxColumnNull = in.readBoolean();
this.maxColumn = Bytes.readByteArray(in);
if (isMaxColumnNull) {
this.maxColumn = null;
}
this.maxColumnInclusive = in.readBoolean();
}
@Override
public KeyValue getNextKeyHint(KeyValue kv) {
return KeyValue.createFirstOnRow(kv.getBuffer(), kv.getRowOffset(), kv
.getRowLength(), kv.getBuffer(), kv.getFamilyOffset(), kv
.getFamilyLength(), this.minColumn, 0, this.minColumn == null ? 0
: this.minColumn.length);
}
}

View File

@ -64,6 +64,7 @@ import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.BitComparator;
import org.apache.hadoop.hbase.filter.ColumnCountGetFilter;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.ColumnRangeFilter;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.DependentColumnFilter;
@ -222,6 +223,8 @@ public class HbaseObjectWritable implements Writable, WritableWithSize, Configur
addToMap(RandomRowFilter.class, code++);
addToMap(CompareOp.class, code++);
addToMap(ColumnRangeFilter.class, code++);
}
private Class<?> declaredClass;

View File

@ -0,0 +1,242 @@
package org.apache.hadoop.hbase.filter;
import static org.junit.Assert.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueTestUtil;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
class StringRange {
private String start = null;
private String end = null;
private boolean startInclusive = true;
private boolean endInclusive = false;
public StringRange(String start, boolean startInclusive, String end,
boolean endInclusive) {
this.start = start;
this.startInclusive = startInclusive;
this.end = end;
this.endInclusive = endInclusive;
}
public String getStart() {
return this.start;
}
public String getEnd() {
return this.end;
}
public boolean isStartInclusive() {
return this.startInclusive;
}
public boolean isEndInclusive() {
return this.endInclusive;
}
@Override
public int hashCode() {
int hashCode = 0;
if (this.start != null) {
hashCode ^= this.start.hashCode();
}
if (this.end != null) {
hashCode ^= this.end.hashCode();
}
return hashCode;
}
@Override
public String toString() {
String result = (this.startInclusive ? "[" : "(")
+ (this.start == null ? null : this.start) + ", "
+ (this.end == null ? null : this.end)
+ (this.endInclusive ? "]" : ")");
return result;
}
public boolean inRange(String value) {
boolean afterStart = true;
if (this.start != null) {
int startCmp = value.compareTo(this.start);
afterStart = this.startInclusive ? startCmp >= 0 : startCmp > 0;
}
boolean beforeEnd = true;
if (this.end != null) {
int endCmp = value.compareTo(this.end);
beforeEnd = this.endInclusive ? endCmp <= 0 : endCmp < 0;
}
return afterStart && beforeEnd;
}
}
public class TestColumnRangeFilter {
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private final Log LOG = LogFactory.getLog(this.getClass());
/**
* @throws java.lang.Exception
*/
@BeforeClass
public static void setUpBeforeClass() throws Exception {
TEST_UTIL.startMiniCluster(3);
}
/**
* @throws java.lang.Exception
*/
@AfterClass
public static void tearDownAfterClass() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
/**
* @throws java.lang.Exception
*/
@Before
public void setUp() throws Exception {
// Nothing to do.
}
/**
* @throws java.lang.Exception
*/
@After
public void tearDown() throws Exception {
// Nothing to do.
}
@Test
public void TestColumnRangeFilterClient() throws Exception {
String family = "Family";
String table = "TestColumnRangeFilterClient";
HTable ht = TEST_UTIL.createTable(Bytes.toBytes(table),
Bytes.toBytes(family), Integer.MAX_VALUE);
List<String> rows = generateRandomWords(10, 8);
long maxTimestamp = 2;
List<String> columns = generateRandomWords(20000, 8);
List<KeyValue> kvList = new ArrayList<KeyValue>();
Map<StringRange, List<KeyValue>> rangeMap = new HashMap<StringRange, List<KeyValue>>();
rangeMap.put(new StringRange(null, true, "b", false),
new ArrayList<KeyValue>());
rangeMap.put(new StringRange("p", true, "q", false),
new ArrayList<KeyValue>());
rangeMap.put(new StringRange("r", false, "s", true),
new ArrayList<KeyValue>());
rangeMap.put(new StringRange("z", false, null, false),
new ArrayList<KeyValue>());
String valueString = "ValueString";
for (String row : rows) {
Put p = new Put(Bytes.toBytes(row));
for (String column : columns) {
for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp,
valueString);
p.add(kv);
kvList.add(kv);
for (StringRange s : rangeMap.keySet()) {
if (s.inRange(column)) {
rangeMap.get(s).add(kv);
}
}
}
}
ht.put(p);
}
TEST_UTIL.flush();
try {
Thread.sleep(3000);
} catch (InterruptedException i) {
// ignore
}
ColumnRangeFilter filter;
Scan scan = new Scan();
scan.setMaxVersions();
for (StringRange s : rangeMap.keySet()) {
filter = new ColumnRangeFilter(s.getStart() == null ? null
: Bytes.toBytes(s.getStart()), s.isStartInclusive(),
s.getEnd() == null ? null : Bytes.toBytes(s.getEnd()),
s.isEndInclusive());
scan.setFilter(filter);
ResultScanner scanner = ht.getScanner(scan);
List<KeyValue> results = new ArrayList<KeyValue>();
LOG.info("scan column range: " + s.toString());
long timeBeforeScan = System.currentTimeMillis();
Result result;
while ((result = scanner.next()) != null) {
for (KeyValue kv : result.list()) {
results.add(kv);
}
}
long scanTime = System.currentTimeMillis() - timeBeforeScan;
LOG.info("scan time = " + scanTime + "ms");
LOG.info("found " + results.size() + " results");
LOG.info("Expecting " + rangeMap.get(s).size() + " results");
/*
for (KeyValue kv : results) {
LOG.info("found row " + Bytes.toString(kv.getRow()) + ", column "
+ Bytes.toString(kv.getQualifier()));
}
*/
assertEquals(rangeMap.get(s).size(), results.size());
}
}
List<String> generateRandomWords(int numberOfWords, int maxLengthOfWords) {
Set<String> wordSet = new HashSet<String>();
for (int i = 0; i < numberOfWords; i++) {
int lengthOfWords = (int) (Math.random() * maxLengthOfWords) + 1;
char[] wordChar = new char[lengthOfWords];
for (int j = 0; j < wordChar.length; j++) {
wordChar[j] = (char) (Math.random() * 26 + 97);
}
String word = new String(wordChar);
wordSet.add(word);
}
List<String> wordList = new ArrayList<String>(wordSet);
return wordList;
}
}