diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/BinaryComponentComparator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/BinaryComponentComparator.java new file mode 100644 index 00000000000..f78f79e419f --- /dev/null +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/BinaryComponentComparator.java @@ -0,0 +1,125 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.filter; + +import com.google.protobuf.ByteString; +import com.google.protobuf.InvalidProtocolBufferException; +import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * A comparator which compares against a specified byte array, but only + * compares specific portion of the byte array. For the rest it is similar to + * {@link BinaryComparator}. + */ +@InterfaceAudience.Public +@SuppressWarnings("ComparableType") +public class BinaryComponentComparator extends ByteArrayComparable { + private int offset; //offset of component from beginning. + + /** + * Constructor + * + * @param value value of the component + * @param offset offset of the component from begining + */ + public BinaryComponentComparator(byte[] value, int offset) { + super(value); + this.offset = offset; + } + + @Override + public int compareTo(byte[] value) { + return compareTo(value, 0, value.length); + } + + @Override + public int compareTo(byte[] value, int offset, int length) { + return Bytes.compareTo(this.value, 0, this.value.length, value, offset + this.offset, + this.value.length); + } + + @Override + public boolean equals(Object other) { + if (other == this){ + return true; + } + if (!(other instanceof BinaryComponentComparator)){ + return false; + } + BinaryComponentComparator bcc = (BinaryComponentComparator)other; + return offset == bcc.offset && + (compareTo(bcc.value) == 0); + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + offset; + return result; + } + + /** + * @return The comparator serialized using pb + */ + @Override + public byte[] toByteArray() { + ComparatorProtos.BinaryComponentComparator.Builder builder = + ComparatorProtos.BinaryComponentComparator.newBuilder(); + builder.setValue(ByteString.copyFrom(this.value)); + builder.setOffset(this.offset); + return builder.build().toByteArray(); + } + + /** + * @param pbBytes A pb serialized {@link BinaryComponentComparator} instance + * @return An instance of {@link BinaryComponentComparator} made from bytes + * @throws DeserializationException DeserializationException + * @see #toByteArray + */ + public static BinaryComponentComparator parseFrom(final byte[] pbBytes) + throws DeserializationException { + ComparatorProtos.BinaryComponentComparator proto; + try { + proto = ComparatorProtos.BinaryComponentComparator.parseFrom(pbBytes); + } catch (InvalidProtocolBufferException e) { + throw new DeserializationException(e); + } + return new BinaryComponentComparator(proto.getValue().toByteArray(), proto.getOffset()); + } + + /** + * @param other paramemter to compare against + * @return true if and only if the fields of the comparator that are + * serialized are equal to the corresponding fields in other. Used for testing. + */ + @Override + boolean areSerializedFieldsEqual(ByteArrayComparable other) { + if (other == this){ + return true; + } + if (!(other instanceof BinaryComponentComparator)){ + return false; + } + return super.areSerializedFieldsEqual(other); + } +} diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java index 3835948b930..868f3b7fda4 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java @@ -104,6 +104,59 @@ public class TestComparators { assertTrue(PrivateCellUtil.qualifierStartsWith(kv, q1)); assertFalse(PrivateCellUtil.qualifierStartsWith(kv, q2)); assertFalse(PrivateCellUtil.qualifierStartsWith(kv, Bytes.toBytes("longerthanthequalifier"))); + + //Binary component comparisons + byte[] val = Bytes.toBytes("abcd"); + kv = new KeyValue(r0, f, q1, val); + buffer = ByteBuffer.wrap(kv.getBuffer()); + bbCell = new ByteBufferKeyValue(buffer, 0, buffer.remaining()); + + //equality check + //row comparison + //row is "row0"(set by variable r0) + //and we are checking for equality to 'o' at position 1 + //'r' is at position 0. + byte[] component = Bytes.toBytes("o"); + comparable = new BinaryComponentComparator(component, 1); + assertEquals(0, PrivateCellUtil.compareRow(bbCell, comparable)); + assertEquals(0, PrivateCellUtil.compareRow(kv, comparable)); + //value comparison + //value is "abcd"(set by variable val). + //and we are checking for equality to 'c' at position 2. + //'a' is at position 0. + component = Bytes.toBytes("c"); + comparable = new BinaryComponentComparator(component, 2); + assertEquals(0,PrivateCellUtil.compareValue(bbCell, comparable)); + assertEquals(0,PrivateCellUtil.compareValue(kv, comparable)); + + //greater than + component = Bytes.toBytes("z"); + //checking for greater than at position 1. + //for both row("row0") and value("abcd") + //'z' > 'r' + comparable = new BinaryComponentComparator(component, 1); + //row comparison + assertTrue(PrivateCellUtil.compareRow(bbCell, comparable) > 0); + assertTrue(PrivateCellUtil.compareRow(kv, comparable) > 0); + //value comparison + //'z' > 'a' + assertTrue(PrivateCellUtil.compareValue(bbCell, comparable) > 0); + assertTrue(PrivateCellUtil.compareValue(kv, comparable) > 0); + + //less than + component = Bytes.toBytes("a"); + //checking for less than at position 1 for row ("row0") + comparable = new BinaryComponentComparator(component, 1); + //row comparison + //'a' < 'r' + assertTrue(PrivateCellUtil.compareRow(bbCell, comparable) < 0); + assertTrue(PrivateCellUtil.compareRow(kv, comparable) < 0); + //value comparison + //checking for less than at position 2 for value("abcd") + //'a' < 'c' + comparable = new BinaryComponentComparator(component, 2); + assertTrue(PrivateCellUtil.compareValue(bbCell, comparable) < 0); + assertTrue(PrivateCellUtil.compareValue(kv, comparable) < 0); } } diff --git a/hbase-protocol-shaded/src/main/protobuf/Comparator.proto b/hbase-protocol-shaded/src/main/protobuf/Comparator.proto index 55253aae5f5..6a087d3fa65 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Comparator.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Comparator.proto @@ -77,3 +77,8 @@ message SubstringComparator { message BigDecimalComparator { required ByteArrayComparable comparable = 1; } + +message BinaryComponentComparator { + required bytes value = 1; + required uint32 offset = 2; +} diff --git a/hbase-protocol/src/main/protobuf/Comparator.proto b/hbase-protocol/src/main/protobuf/Comparator.proto index 878a179ef32..802021f7ccd 100644 --- a/hbase-protocol/src/main/protobuf/Comparator.proto +++ b/hbase-protocol/src/main/protobuf/Comparator.proto @@ -76,3 +76,8 @@ message SubstringComparator { message BigDecimalComparator { required ByteArrayComparable comparable = 1; } + +message BinaryComponentComparator { + required bytes value = 1; + required uint32 offset = 2; +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFiltersWithBinaryComponentComparator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFiltersWithBinaryComponentComparator.java new file mode 100644 index 00000000000..a3ade6e4889 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFiltersWithBinaryComponentComparator.java @@ -0,0 +1,293 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.codec.binary.Hex; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.CompareOperator; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category(MediumTests.class) +public class TestFiltersWithBinaryComponentComparator { + + /** + * See https://issues.apache.org/jira/browse/HBASE-22969 - for need of BinaryComponentComparator + * The descrption on jira should also help you in understanding tests implemented in this class + */ + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestFiltersWithBinaryComponentComparator.class); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final Logger LOG = + LoggerFactory.getLogger(TestFiltersWithBinaryComponentComparator.class); + private byte[] family = Bytes.toBytes("family"); + private byte[] qf = Bytes.toBytes("qf"); + private TableName tableName; + private int aOffset = 0; + private int bOffset = 4; + private int cOffset = 8; + private int dOffset = 12; + + @Rule + public TestName name = new TestName(); + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.startMiniCluster(); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testRowFilterWithBinaryComponentComparator() throws IOException { + //SELECT * from table where a=1 and b > 10 and b < 20 and c > 90 and c < 100 and d=1 + tableName = TableName.valueOf(name.getMethodName()); + Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE); + generateRows(ht, family, qf); + FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL); + setRowFilters(filterList); + Scan scan = createScan(filterList); + List result = getResults(ht,scan); + for(Cell cell: result){ + byte[] key = CellUtil.cloneRow(cell); + int a = Bytes.readAsInt(key,aOffset,4); + int b = Bytes.readAsInt(key,bOffset,4); + int c = Bytes.readAsInt(key,cOffset,4); + int d = Bytes.readAsInt(key,dOffset,4); + assertTrue(a == 1 && + b > 10 && + b < 20 && + c > 90 && + c < 100 && + d == 1); + } + ht.close(); + } + + @Test + public void testValueFilterWithBinaryComponentComparator() throws IOException { + //SELECT * from table where value has 'y' at position 1 + tableName = TableName.valueOf(name.getMethodName()); + Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE); + generateRows(ht, family, qf); + FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL); + setValueFilters(filterList); + Scan scan = new Scan(); + scan.setFilter(filterList); + List result = getResults(ht,scan); + for(Cell cell: result){ + byte[] value = CellUtil.cloneValue(cell); + assertTrue(Bytes.toString(value).charAt(1) == 'y'); + } + ht.close(); + } + + @Test + public void testRowAndValueFilterWithBinaryComponentComparator() throws IOException { + //SELECT * from table where a=1 and b > 10 and b < 20 and c > 90 and c < 100 and d=1 + //and value has 'y' at position 1" + tableName = TableName.valueOf(name.getMethodName()); + Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE); + generateRows(ht, family, qf); + FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL); + setRowFilters(filterList); + setValueFilters(filterList); + Scan scan = new Scan(); + scan.setFilter(filterList); + List result = getResults(ht,scan); + for(Cell cell: result){ + byte[] key = CellUtil.cloneRow(cell); + int a = Bytes.readAsInt(key,aOffset,4); + int b = Bytes.readAsInt(key,bOffset,4); + int c = Bytes.readAsInt(key,cOffset,4); + int d = Bytes.readAsInt(key,dOffset,4); + assertTrue(a == 1 && + b > 10 && + b < 20 && + c > 90 && + c < 100 && + d == 1); + byte[] value = CellUtil.cloneValue(cell); + assertTrue(Bytes.toString(value).charAt(1) == 'y'); + } + ht.close(); + } + + /** + * Since we are trying to emulate + * SQL: SELECT * from table where a = 1 and b > 10 and b < 20 and + * c > 90 and c < 100 and d = 1 + * We are generating rows with: + * a = 1, b >=9 and b < 22, c >= 89 and c < 102, and d = 1 + * At the end the table will look something like this: + * ------------ + * a| b| c|d| + * ------------ + * 1| 9| 89|1|family:qf|xyz| + * ----------- + * 1| 9| 90|1|family:qf|abc| + * ----------- + * 1| 9| 91|1|family:qf|xyz| + * ------------------------- + * . + * ------------------------- + * . + * ------------------------- + * 1|21|101|1|family:qf|xyz| + */ + private void generateRows(Table ht, byte[] family, byte[] qf) + throws IOException { + for(int a = 1; a < 2; ++a) { + for(int b = 9; b < 22; ++b) { + for(int c = 89; c < 102; ++c) { + for(int d = 1; d < 2 ; ++d) { + byte[] key = new byte[16]; + Bytes.putInt(key,0,a); + Bytes.putInt(key,4,b); + Bytes.putInt(key,8,c); + Bytes.putInt(key,12,d); + Put row = new Put(key); + if (c%2==0) { + row.addColumn(family, qf, Bytes.toBytes("abc")); + LOG.info("added row:" + Hex.encodeHex(key) + "with value 'abc'"); + } + else { + row.addColumn(family, qf, Bytes.toBytes("xyz")); + LOG.info("added row:" + Hex.encodeHex(key) + "with value 'xyz'"); + } + } + } + } + } + TEST_UTIL.flush(); + } + + private void setRowFilters(FilterList filterList) { + //offset for b as it is second component of "a+b+c+d" + //'a' is at offset 0 + int bOffset = 4; + byte[] b10 = Bytes.toBytes(10); //tests b > 10 + Filter b10Filter = new RowFilter(CompareOperator.GREATER, + new BinaryComponentComparator(b10,bOffset)); + filterList.addFilter(b10Filter); + + byte[] b20 = Bytes.toBytes(20); //tests b < 20 + Filter b20Filter = new RowFilter(CompareOperator.LESS, + new BinaryComponentComparator(b20,bOffset)); + filterList.addFilter(b20Filter); + + //offset for c as it is third component of "a+b+c+d" + int cOffset = 8; + byte[] c90 = Bytes.toBytes(90); //tests c > 90 + Filter c90Filter = new RowFilter(CompareOperator.GREATER, + new BinaryComponentComparator(c90,cOffset)); + filterList.addFilter(c90Filter); + + byte[] c100 = Bytes.toBytes(100); //tests c < 100 + Filter c100Filter = new RowFilter(CompareOperator.LESS, + new BinaryComponentComparator(c100,cOffset)); + filterList.addFilter(c100Filter); + + //offset for d as it is fourth component of "a+b+c+d" + int dOffset = 12; + byte[] d1 = Bytes.toBytes(1); //tests d == 1 + Filter dFilter = new RowFilter(CompareOperator.EQUAL, + new BinaryComponentComparator(d1,dOffset)); + + filterList.addFilter(dFilter); + + } + + /** + * We have rows with either "abc" or "xyz". + * We want values which have 'y' at second position + * of the string. + * As a result only values with "xyz" shall be returned + */ + private void setValueFilters(FilterList filterList) { + int offset = 1; + byte[] y = Bytes.toBytes("y"); + Filter yFilter = new ValueFilter(CompareOperator.EQUAL, + new BinaryComponentComparator(y,offset)); + filterList.addFilter(yFilter); + } + + private Scan createScan(FilterList list) { + //build start and end key for scan + byte[] startKey = new byte[16]; //key size with four ints + Bytes.putInt(startKey,aOffset,1); //a=1, takes care of a = 1 + Bytes.putInt(startKey,bOffset,11); //b=11, takes care of b > 10 + Bytes.putInt(startKey,cOffset,91); //c=91, + Bytes.putInt(startKey,dOffset,1); //d=1, + + byte[] endKey = new byte[16]; + Bytes.putInt(endKey,aOffset,1); //a=1, takes care of a = 1 + Bytes.putInt(endKey,bOffset,20); //b=20, takes care of b < 20 + Bytes.putInt(endKey,cOffset,100); //c=100, + Bytes.putInt(endKey,dOffset,1); //d=1, + + //setup scan + Scan scan = new Scan().withStartRow(startKey).withStopRow(endKey); + scan.setFilter(list); + return scan; + } + + private List getResults(Table ht, Scan scan) throws IOException { + ResultScanner scanner = ht.getScanner(scan); + List results = new ArrayList<>(); + Result r; + while ((r = scanner.next()) != null) { + for (Cell kv : r.listCells()) { + results.add(kv); + } + } + scanner.close(); + return results; + } + +} diff --git a/src/main/asciidoc/_chapters/architecture.adoc b/src/main/asciidoc/_chapters/architecture.adoc index 06119b5fc86..c942e5418e8 100644 --- a/src/main/asciidoc/_chapters/architecture.adoc +++ b/src/main/asciidoc/_chapters/architecture.adoc @@ -410,6 +410,20 @@ See link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryP See link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryComparator.html[BinaryComparator]. +[[client.filter.cvp.bcc]] +==== BinaryComponentComparator + +link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryComponentComparator.html[BinaryComponentComparator] can be used to compare specific value at specific location with in the cell value. The comparison can be done for both ascii and binary data. + +[source,java] +---- +byte[] partialValue = Bytes.toBytes("partial_value"); + int partialValueOffset = + Filter partialValueFilter = new ValueFilter(CompareFilter.CompareOp.GREATER, + new BinaryComponentComparator(partialValue,partialValueOffset)); +---- +See link:https://issues.apache.org/jira/browse/HBASE-22969[HBASE-22969] for other use cases and details. + [[client.filter.kvm]] === KeyValue Metadata @@ -535,6 +549,8 @@ Note: Introduced in HBase 0.92 It is generally a better idea to use the startRow/stopRow methods on Scan for row selection, however link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/RowFilter.html[RowFilter] can also be used. +You can supplement a scan (both bounded and unbounded) with RowFilter constructed from link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryComponentComparator.html[BinaryComponentComparator] for further filtering out or filtering in rows. See link:https://issues.apache.org/jira/browse/HBASE-22969[HBASE-22969] for use cases and other details. + [[client.filter.utility]] === Utility