HBASE-22969 A new binary component comparator(BinaryComponentComparator) to perform comparison of arbitrary length and position (#829)

Signed-off-by: Balazs Meszaros <meszibalu@apache.org>
This commit is contained in:
Udai Bhan Kashyap 2019-11-18 07:28:06 -05:00 committed by Balazs Meszaros
parent baf8849050
commit ab63bde013
6 changed files with 497 additions and 0 deletions

View File

@ -0,0 +1,125 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import com.google.protobuf.ByteString;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.yetus.audience.InterfaceAudience;
/**
* A comparator which compares against a specified byte array, but only
* compares specific portion of the byte array. For the rest it is similar to
* {@link BinaryComparator}.
*/
@InterfaceAudience.Public
@SuppressWarnings("ComparableType")
public class BinaryComponentComparator extends ByteArrayComparable {
private int offset; //offset of component from beginning.
/**
* Constructor
*
* @param value value of the component
* @param offset offset of the component from begining
*/
public BinaryComponentComparator(byte[] value, int offset) {
super(value);
this.offset = offset;
}
@Override
public int compareTo(byte[] value) {
return compareTo(value, 0, value.length);
}
@Override
public int compareTo(byte[] value, int offset, int length) {
return Bytes.compareTo(this.value, 0, this.value.length, value, offset + this.offset,
this.value.length);
}
@Override
public boolean equals(Object other) {
if (other == this){
return true;
}
if (!(other instanceof BinaryComponentComparator)){
return false;
}
BinaryComponentComparator bcc = (BinaryComponentComparator)other;
return offset == bcc.offset &&
(compareTo(bcc.value) == 0);
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + offset;
return result;
}
/**
* @return The comparator serialized using pb
*/
@Override
public byte[] toByteArray() {
ComparatorProtos.BinaryComponentComparator.Builder builder =
ComparatorProtos.BinaryComponentComparator.newBuilder();
builder.setValue(ByteString.copyFrom(this.value));
builder.setOffset(this.offset);
return builder.build().toByteArray();
}
/**
* @param pbBytes A pb serialized {@link BinaryComponentComparator} instance
* @return An instance of {@link BinaryComponentComparator} made from <code>bytes</code>
* @throws DeserializationException DeserializationException
* @see #toByteArray
*/
public static BinaryComponentComparator parseFrom(final byte[] pbBytes)
throws DeserializationException {
ComparatorProtos.BinaryComponentComparator proto;
try {
proto = ComparatorProtos.BinaryComponentComparator.parseFrom(pbBytes);
} catch (InvalidProtocolBufferException e) {
throw new DeserializationException(e);
}
return new BinaryComponentComparator(proto.getValue().toByteArray(), proto.getOffset());
}
/**
* @param other paramemter to compare against
* @return true if and only if the fields of the comparator that are
* serialized are equal to the corresponding fields in other. Used for testing.
*/
@Override
boolean areSerializedFieldsEqual(ByteArrayComparable other) {
if (other == this){
return true;
}
if (!(other instanceof BinaryComponentComparator)){
return false;
}
return super.areSerializedFieldsEqual(other);
}
}

View File

@ -104,6 +104,59 @@ public class TestComparators {
assertTrue(PrivateCellUtil.qualifierStartsWith(kv, q1));
assertFalse(PrivateCellUtil.qualifierStartsWith(kv, q2));
assertFalse(PrivateCellUtil.qualifierStartsWith(kv, Bytes.toBytes("longerthanthequalifier")));
//Binary component comparisons
byte[] val = Bytes.toBytes("abcd");
kv = new KeyValue(r0, f, q1, val);
buffer = ByteBuffer.wrap(kv.getBuffer());
bbCell = new ByteBufferKeyValue(buffer, 0, buffer.remaining());
//equality check
//row comparison
//row is "row0"(set by variable r0)
//and we are checking for equality to 'o' at position 1
//'r' is at position 0.
byte[] component = Bytes.toBytes("o");
comparable = new BinaryComponentComparator(component, 1);
assertEquals(0, PrivateCellUtil.compareRow(bbCell, comparable));
assertEquals(0, PrivateCellUtil.compareRow(kv, comparable));
//value comparison
//value is "abcd"(set by variable val).
//and we are checking for equality to 'c' at position 2.
//'a' is at position 0.
component = Bytes.toBytes("c");
comparable = new BinaryComponentComparator(component, 2);
assertEquals(0,PrivateCellUtil.compareValue(bbCell, comparable));
assertEquals(0,PrivateCellUtil.compareValue(kv, comparable));
//greater than
component = Bytes.toBytes("z");
//checking for greater than at position 1.
//for both row("row0") and value("abcd")
//'z' > 'r'
comparable = new BinaryComponentComparator(component, 1);
//row comparison
assertTrue(PrivateCellUtil.compareRow(bbCell, comparable) > 0);
assertTrue(PrivateCellUtil.compareRow(kv, comparable) > 0);
//value comparison
//'z' > 'a'
assertTrue(PrivateCellUtil.compareValue(bbCell, comparable) > 0);
assertTrue(PrivateCellUtil.compareValue(kv, comparable) > 0);
//less than
component = Bytes.toBytes("a");
//checking for less than at position 1 for row ("row0")
comparable = new BinaryComponentComparator(component, 1);
//row comparison
//'a' < 'r'
assertTrue(PrivateCellUtil.compareRow(bbCell, comparable) < 0);
assertTrue(PrivateCellUtil.compareRow(kv, comparable) < 0);
//value comparison
//checking for less than at position 2 for value("abcd")
//'a' < 'c'
comparable = new BinaryComponentComparator(component, 2);
assertTrue(PrivateCellUtil.compareValue(bbCell, comparable) < 0);
assertTrue(PrivateCellUtil.compareValue(kv, comparable) < 0);
}
}

View File

@ -77,3 +77,8 @@ message SubstringComparator {
message BigDecimalComparator {
required ByteArrayComparable comparable = 1;
}
message BinaryComponentComparator {
required bytes value = 1;
required uint32 offset = 2;
}

View File

@ -76,3 +76,8 @@ message SubstringComparator {
message BigDecimalComparator {
required ByteArrayComparable comparable = 1;
}
message BinaryComponentComparator {
required bytes value = 1;
required uint32 offset = 2;
}

View File

@ -0,0 +1,293 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.codec.binary.Hex;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Category(MediumTests.class)
public class TestFiltersWithBinaryComponentComparator {
/**
* See https://issues.apache.org/jira/browse/HBASE-22969 - for need of BinaryComponentComparator
* The descrption on jira should also help you in understanding tests implemented in this class
*/
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestFiltersWithBinaryComponentComparator.class);
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static final Logger LOG =
LoggerFactory.getLogger(TestFiltersWithBinaryComponentComparator.class);
private byte[] family = Bytes.toBytes("family");
private byte[] qf = Bytes.toBytes("qf");
private TableName tableName;
private int aOffset = 0;
private int bOffset = 4;
private int cOffset = 8;
private int dOffset = 12;
@Rule
public TestName name = new TestName();
@BeforeClass
public static void setUpBeforeClass() throws Exception {
TEST_UTIL.startMiniCluster();
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
@Test
public void testRowFilterWithBinaryComponentComparator() throws IOException {
//SELECT * from table where a=1 and b > 10 and b < 20 and c > 90 and c < 100 and d=1
tableName = TableName.valueOf(name.getMethodName());
Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE);
generateRows(ht, family, qf);
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
setRowFilters(filterList);
Scan scan = createScan(filterList);
List<Cell> result = getResults(ht,scan);
for(Cell cell: result){
byte[] key = CellUtil.cloneRow(cell);
int a = Bytes.readAsInt(key,aOffset,4);
int b = Bytes.readAsInt(key,bOffset,4);
int c = Bytes.readAsInt(key,cOffset,4);
int d = Bytes.readAsInt(key,dOffset,4);
assertTrue(a == 1 &&
b > 10 &&
b < 20 &&
c > 90 &&
c < 100 &&
d == 1);
}
ht.close();
}
@Test
public void testValueFilterWithBinaryComponentComparator() throws IOException {
//SELECT * from table where value has 'y' at position 1
tableName = TableName.valueOf(name.getMethodName());
Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE);
generateRows(ht, family, qf);
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
setValueFilters(filterList);
Scan scan = new Scan();
scan.setFilter(filterList);
List<Cell> result = getResults(ht,scan);
for(Cell cell: result){
byte[] value = CellUtil.cloneValue(cell);
assertTrue(Bytes.toString(value).charAt(1) == 'y');
}
ht.close();
}
@Test
public void testRowAndValueFilterWithBinaryComponentComparator() throws IOException {
//SELECT * from table where a=1 and b > 10 and b < 20 and c > 90 and c < 100 and d=1
//and value has 'y' at position 1"
tableName = TableName.valueOf(name.getMethodName());
Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE);
generateRows(ht, family, qf);
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
setRowFilters(filterList);
setValueFilters(filterList);
Scan scan = new Scan();
scan.setFilter(filterList);
List<Cell> result = getResults(ht,scan);
for(Cell cell: result){
byte[] key = CellUtil.cloneRow(cell);
int a = Bytes.readAsInt(key,aOffset,4);
int b = Bytes.readAsInt(key,bOffset,4);
int c = Bytes.readAsInt(key,cOffset,4);
int d = Bytes.readAsInt(key,dOffset,4);
assertTrue(a == 1 &&
b > 10 &&
b < 20 &&
c > 90 &&
c < 100 &&
d == 1);
byte[] value = CellUtil.cloneValue(cell);
assertTrue(Bytes.toString(value).charAt(1) == 'y');
}
ht.close();
}
/**
* Since we are trying to emulate
* SQL: SELECT * from table where a = 1 and b > 10 and b < 20 and
* c > 90 and c < 100 and d = 1
* We are generating rows with:
* a = 1, b >=9 and b < 22, c >= 89 and c < 102, and d = 1
* At the end the table will look something like this:
* ------------
* a| b| c|d|
* ------------
* 1| 9| 89|1|family:qf|xyz|
* -----------
* 1| 9| 90|1|family:qf|abc|
* -----------
* 1| 9| 91|1|family:qf|xyz|
* -------------------------
* .
* -------------------------
* .
* -------------------------
* 1|21|101|1|family:qf|xyz|
*/
private void generateRows(Table ht, byte[] family, byte[] qf)
throws IOException {
for(int a = 1; a < 2; ++a) {
for(int b = 9; b < 22; ++b) {
for(int c = 89; c < 102; ++c) {
for(int d = 1; d < 2 ; ++d) {
byte[] key = new byte[16];
Bytes.putInt(key,0,a);
Bytes.putInt(key,4,b);
Bytes.putInt(key,8,c);
Bytes.putInt(key,12,d);
Put row = new Put(key);
if (c%2==0) {
row.addColumn(family, qf, Bytes.toBytes("abc"));
LOG.info("added row:" + Hex.encodeHex(key) + "with value 'abc'");
}
else {
row.addColumn(family, qf, Bytes.toBytes("xyz"));
LOG.info("added row:" + Hex.encodeHex(key) + "with value 'xyz'");
}
}
}
}
}
TEST_UTIL.flush();
}
private void setRowFilters(FilterList filterList) {
//offset for b as it is second component of "a+b+c+d"
//'a' is at offset 0
int bOffset = 4;
byte[] b10 = Bytes.toBytes(10); //tests b > 10
Filter b10Filter = new RowFilter(CompareOperator.GREATER,
new BinaryComponentComparator(b10,bOffset));
filterList.addFilter(b10Filter);
byte[] b20 = Bytes.toBytes(20); //tests b < 20
Filter b20Filter = new RowFilter(CompareOperator.LESS,
new BinaryComponentComparator(b20,bOffset));
filterList.addFilter(b20Filter);
//offset for c as it is third component of "a+b+c+d"
int cOffset = 8;
byte[] c90 = Bytes.toBytes(90); //tests c > 90
Filter c90Filter = new RowFilter(CompareOperator.GREATER,
new BinaryComponentComparator(c90,cOffset));
filterList.addFilter(c90Filter);
byte[] c100 = Bytes.toBytes(100); //tests c < 100
Filter c100Filter = new RowFilter(CompareOperator.LESS,
new BinaryComponentComparator(c100,cOffset));
filterList.addFilter(c100Filter);
//offset for d as it is fourth component of "a+b+c+d"
int dOffset = 12;
byte[] d1 = Bytes.toBytes(1); //tests d == 1
Filter dFilter = new RowFilter(CompareOperator.EQUAL,
new BinaryComponentComparator(d1,dOffset));
filterList.addFilter(dFilter);
}
/**
* We have rows with either "abc" or "xyz".
* We want values which have 'y' at second position
* of the string.
* As a result only values with "xyz" shall be returned
*/
private void setValueFilters(FilterList filterList) {
int offset = 1;
byte[] y = Bytes.toBytes("y");
Filter yFilter = new ValueFilter(CompareOperator.EQUAL,
new BinaryComponentComparator(y,offset));
filterList.addFilter(yFilter);
}
private Scan createScan(FilterList list) {
//build start and end key for scan
byte[] startKey = new byte[16]; //key size with four ints
Bytes.putInt(startKey,aOffset,1); //a=1, takes care of a = 1
Bytes.putInt(startKey,bOffset,11); //b=11, takes care of b > 10
Bytes.putInt(startKey,cOffset,91); //c=91,
Bytes.putInt(startKey,dOffset,1); //d=1,
byte[] endKey = new byte[16];
Bytes.putInt(endKey,aOffset,1); //a=1, takes care of a = 1
Bytes.putInt(endKey,bOffset,20); //b=20, takes care of b < 20
Bytes.putInt(endKey,cOffset,100); //c=100,
Bytes.putInt(endKey,dOffset,1); //d=1,
//setup scan
Scan scan = new Scan().withStartRow(startKey).withStopRow(endKey);
scan.setFilter(list);
return scan;
}
private List<Cell> getResults(Table ht, Scan scan) throws IOException {
ResultScanner scanner = ht.getScanner(scan);
List<Cell> results = new ArrayList<>();
Result r;
while ((r = scanner.next()) != null) {
for (Cell kv : r.listCells()) {
results.add(kv);
}
}
scanner.close();
return results;
}
}

View File

@ -410,6 +410,20 @@ See link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryP
See link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryComparator.html[BinaryComparator].
[[client.filter.cvp.bcc]]
==== BinaryComponentComparator
link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryComponentComparator.html[BinaryComponentComparator] can be used to compare specific value at specific location with in the cell value. The comparison can be done for both ascii and binary data.
[source,java]
----
byte[] partialValue = Bytes.toBytes("partial_value");
int partialValueOffset =
Filter partialValueFilter = new ValueFilter(CompareFilter.CompareOp.GREATER,
new BinaryComponentComparator(partialValue,partialValueOffset));
----
See link:https://issues.apache.org/jira/browse/HBASE-22969[HBASE-22969] for other use cases and details.
[[client.filter.kvm]]
=== KeyValue Metadata
@ -535,6 +549,8 @@ Note: Introduced in HBase 0.92
It is generally a better idea to use the startRow/stopRow methods on Scan for row selection, however link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/RowFilter.html[RowFilter] can also be used.
You can supplement a scan (both bounded and unbounded) with RowFilter constructed from link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryComponentComparator.html[BinaryComponentComparator] for further filtering out or filtering in rows. See link:https://issues.apache.org/jira/browse/HBASE-22969[HBASE-22969] for use cases and other details.
[[client.filter.utility]]
=== Utility