diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/BinaryComponentComparator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/BinaryComponentComparator.java
new file mode 100644
index 00000000000..f78f79e419f
--- /dev/null
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/BinaryComponentComparator.java
@@ -0,0 +1,125 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.filter;
+
+import com.google.protobuf.ByteString;
+import com.google.protobuf.InvalidProtocolBufferException;
+import org.apache.hadoop.hbase.exceptions.DeserializationException;
+import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * A comparator which compares against a specified byte array, but only
+ * compares specific portion of the byte array. For the rest it is similar to
+ * {@link BinaryComparator}.
+ */
+@InterfaceAudience.Public
+@SuppressWarnings("ComparableType")
+public class BinaryComponentComparator extends ByteArrayComparable {
+ private int offset; //offset of component from beginning.
+
+ /**
+ * Constructor
+ *
+ * @param value value of the component
+ * @param offset offset of the component from begining
+ */
+ public BinaryComponentComparator(byte[] value, int offset) {
+ super(value);
+ this.offset = offset;
+ }
+
+ @Override
+ public int compareTo(byte[] value) {
+ return compareTo(value, 0, value.length);
+ }
+
+ @Override
+ public int compareTo(byte[] value, int offset, int length) {
+ return Bytes.compareTo(this.value, 0, this.value.length, value, offset + this.offset,
+ this.value.length);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == this){
+ return true;
+ }
+ if (!(other instanceof BinaryComponentComparator)){
+ return false;
+ }
+ BinaryComponentComparator bcc = (BinaryComponentComparator)other;
+ return offset == bcc.offset &&
+ (compareTo(bcc.value) == 0);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + offset;
+ return result;
+ }
+
+ /**
+ * @return The comparator serialized using pb
+ */
+ @Override
+ public byte[] toByteArray() {
+ ComparatorProtos.BinaryComponentComparator.Builder builder =
+ ComparatorProtos.BinaryComponentComparator.newBuilder();
+ builder.setValue(ByteString.copyFrom(this.value));
+ builder.setOffset(this.offset);
+ return builder.build().toByteArray();
+ }
+
+ /**
+ * @param pbBytes A pb serialized {@link BinaryComponentComparator} instance
+ * @return An instance of {@link BinaryComponentComparator} made from bytes
+ * @throws DeserializationException DeserializationException
+ * @see #toByteArray
+ */
+ public static BinaryComponentComparator parseFrom(final byte[] pbBytes)
+ throws DeserializationException {
+ ComparatorProtos.BinaryComponentComparator proto;
+ try {
+ proto = ComparatorProtos.BinaryComponentComparator.parseFrom(pbBytes);
+ } catch (InvalidProtocolBufferException e) {
+ throw new DeserializationException(e);
+ }
+ return new BinaryComponentComparator(proto.getValue().toByteArray(), proto.getOffset());
+ }
+
+ /**
+ * @param other paramemter to compare against
+ * @return true if and only if the fields of the comparator that are
+ * serialized are equal to the corresponding fields in other. Used for testing.
+ */
+ @Override
+ boolean areSerializedFieldsEqual(ByteArrayComparable other) {
+ if (other == this){
+ return true;
+ }
+ if (!(other instanceof BinaryComponentComparator)){
+ return false;
+ }
+ return super.areSerializedFieldsEqual(other);
+ }
+}
diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java
index 3835948b930..868f3b7fda4 100644
--- a/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java
+++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java
@@ -104,6 +104,59 @@ public class TestComparators {
assertTrue(PrivateCellUtil.qualifierStartsWith(kv, q1));
assertFalse(PrivateCellUtil.qualifierStartsWith(kv, q2));
assertFalse(PrivateCellUtil.qualifierStartsWith(kv, Bytes.toBytes("longerthanthequalifier")));
+
+ //Binary component comparisons
+ byte[] val = Bytes.toBytes("abcd");
+ kv = new KeyValue(r0, f, q1, val);
+ buffer = ByteBuffer.wrap(kv.getBuffer());
+ bbCell = new ByteBufferKeyValue(buffer, 0, buffer.remaining());
+
+ //equality check
+ //row comparison
+ //row is "row0"(set by variable r0)
+ //and we are checking for equality to 'o' at position 1
+ //'r' is at position 0.
+ byte[] component = Bytes.toBytes("o");
+ comparable = new BinaryComponentComparator(component, 1);
+ assertEquals(0, PrivateCellUtil.compareRow(bbCell, comparable));
+ assertEquals(0, PrivateCellUtil.compareRow(kv, comparable));
+ //value comparison
+ //value is "abcd"(set by variable val).
+ //and we are checking for equality to 'c' at position 2.
+ //'a' is at position 0.
+ component = Bytes.toBytes("c");
+ comparable = new BinaryComponentComparator(component, 2);
+ assertEquals(0,PrivateCellUtil.compareValue(bbCell, comparable));
+ assertEquals(0,PrivateCellUtil.compareValue(kv, comparable));
+
+ //greater than
+ component = Bytes.toBytes("z");
+ //checking for greater than at position 1.
+ //for both row("row0") and value("abcd")
+ //'z' > 'r'
+ comparable = new BinaryComponentComparator(component, 1);
+ //row comparison
+ assertTrue(PrivateCellUtil.compareRow(bbCell, comparable) > 0);
+ assertTrue(PrivateCellUtil.compareRow(kv, comparable) > 0);
+ //value comparison
+ //'z' > 'a'
+ assertTrue(PrivateCellUtil.compareValue(bbCell, comparable) > 0);
+ assertTrue(PrivateCellUtil.compareValue(kv, comparable) > 0);
+
+ //less than
+ component = Bytes.toBytes("a");
+ //checking for less than at position 1 for row ("row0")
+ comparable = new BinaryComponentComparator(component, 1);
+ //row comparison
+ //'a' < 'r'
+ assertTrue(PrivateCellUtil.compareRow(bbCell, comparable) < 0);
+ assertTrue(PrivateCellUtil.compareRow(kv, comparable) < 0);
+ //value comparison
+ //checking for less than at position 2 for value("abcd")
+ //'a' < 'c'
+ comparable = new BinaryComponentComparator(component, 2);
+ assertTrue(PrivateCellUtil.compareValue(bbCell, comparable) < 0);
+ assertTrue(PrivateCellUtil.compareValue(kv, comparable) < 0);
}
}
diff --git a/hbase-protocol-shaded/src/main/protobuf/Comparator.proto b/hbase-protocol-shaded/src/main/protobuf/Comparator.proto
index 55253aae5f5..6a087d3fa65 100644
--- a/hbase-protocol-shaded/src/main/protobuf/Comparator.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/Comparator.proto
@@ -77,3 +77,8 @@ message SubstringComparator {
message BigDecimalComparator {
required ByteArrayComparable comparable = 1;
}
+
+message BinaryComponentComparator {
+ required bytes value = 1;
+ required uint32 offset = 2;
+}
diff --git a/hbase-protocol/src/main/protobuf/Comparator.proto b/hbase-protocol/src/main/protobuf/Comparator.proto
index 878a179ef32..802021f7ccd 100644
--- a/hbase-protocol/src/main/protobuf/Comparator.proto
+++ b/hbase-protocol/src/main/protobuf/Comparator.proto
@@ -76,3 +76,8 @@ message SubstringComparator {
message BigDecimalComparator {
required ByteArrayComparable comparable = 1;
}
+
+message BinaryComponentComparator {
+ required bytes value = 1;
+ required uint32 offset = 2;
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFiltersWithBinaryComponentComparator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFiltersWithBinaryComponentComparator.java
new file mode 100644
index 00000000000..a3ade6e4889
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFiltersWithBinaryComponentComparator.java
@@ -0,0 +1,293 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.filter;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.CompareOperator;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category(MediumTests.class)
+public class TestFiltersWithBinaryComponentComparator {
+
+ /**
+ * See https://issues.apache.org/jira/browse/HBASE-22969 - for need of BinaryComponentComparator
+ * The descrption on jira should also help you in understanding tests implemented in this class
+ */
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestFiltersWithBinaryComponentComparator.class);
+
+ private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ private static final Logger LOG =
+ LoggerFactory.getLogger(TestFiltersWithBinaryComponentComparator.class);
+ private byte[] family = Bytes.toBytes("family");
+ private byte[] qf = Bytes.toBytes("qf");
+ private TableName tableName;
+ private int aOffset = 0;
+ private int bOffset = 4;
+ private int cOffset = 8;
+ private int dOffset = 12;
+
+ @Rule
+ public TestName name = new TestName();
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ TEST_UTIL.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testRowFilterWithBinaryComponentComparator() throws IOException {
+ //SELECT * from table where a=1 and b > 10 and b < 20 and c > 90 and c < 100 and d=1
+ tableName = TableName.valueOf(name.getMethodName());
+ Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE);
+ generateRows(ht, family, qf);
+ FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
+ setRowFilters(filterList);
+ Scan scan = createScan(filterList);
+ List result = getResults(ht,scan);
+ for(Cell cell: result){
+ byte[] key = CellUtil.cloneRow(cell);
+ int a = Bytes.readAsInt(key,aOffset,4);
+ int b = Bytes.readAsInt(key,bOffset,4);
+ int c = Bytes.readAsInt(key,cOffset,4);
+ int d = Bytes.readAsInt(key,dOffset,4);
+ assertTrue(a == 1 &&
+ b > 10 &&
+ b < 20 &&
+ c > 90 &&
+ c < 100 &&
+ d == 1);
+ }
+ ht.close();
+ }
+
+ @Test
+ public void testValueFilterWithBinaryComponentComparator() throws IOException {
+ //SELECT * from table where value has 'y' at position 1
+ tableName = TableName.valueOf(name.getMethodName());
+ Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE);
+ generateRows(ht, family, qf);
+ FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
+ setValueFilters(filterList);
+ Scan scan = new Scan();
+ scan.setFilter(filterList);
+ List result = getResults(ht,scan);
+ for(Cell cell: result){
+ byte[] value = CellUtil.cloneValue(cell);
+ assertTrue(Bytes.toString(value).charAt(1) == 'y');
+ }
+ ht.close();
+ }
+
+ @Test
+ public void testRowAndValueFilterWithBinaryComponentComparator() throws IOException {
+ //SELECT * from table where a=1 and b > 10 and b < 20 and c > 90 and c < 100 and d=1
+ //and value has 'y' at position 1"
+ tableName = TableName.valueOf(name.getMethodName());
+ Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE);
+ generateRows(ht, family, qf);
+ FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
+ setRowFilters(filterList);
+ setValueFilters(filterList);
+ Scan scan = new Scan();
+ scan.setFilter(filterList);
+ List result = getResults(ht,scan);
+ for(Cell cell: result){
+ byte[] key = CellUtil.cloneRow(cell);
+ int a = Bytes.readAsInt(key,aOffset,4);
+ int b = Bytes.readAsInt(key,bOffset,4);
+ int c = Bytes.readAsInt(key,cOffset,4);
+ int d = Bytes.readAsInt(key,dOffset,4);
+ assertTrue(a == 1 &&
+ b > 10 &&
+ b < 20 &&
+ c > 90 &&
+ c < 100 &&
+ d == 1);
+ byte[] value = CellUtil.cloneValue(cell);
+ assertTrue(Bytes.toString(value).charAt(1) == 'y');
+ }
+ ht.close();
+ }
+
+ /**
+ * Since we are trying to emulate
+ * SQL: SELECT * from table where a = 1 and b > 10 and b < 20 and
+ * c > 90 and c < 100 and d = 1
+ * We are generating rows with:
+ * a = 1, b >=9 and b < 22, c >= 89 and c < 102, and d = 1
+ * At the end the table will look something like this:
+ * ------------
+ * a| b| c|d|
+ * ------------
+ * 1| 9| 89|1|family:qf|xyz|
+ * -----------
+ * 1| 9| 90|1|family:qf|abc|
+ * -----------
+ * 1| 9| 91|1|family:qf|xyz|
+ * -------------------------
+ * .
+ * -------------------------
+ * .
+ * -------------------------
+ * 1|21|101|1|family:qf|xyz|
+ */
+ private void generateRows(Table ht, byte[] family, byte[] qf)
+ throws IOException {
+ for(int a = 1; a < 2; ++a) {
+ for(int b = 9; b < 22; ++b) {
+ for(int c = 89; c < 102; ++c) {
+ for(int d = 1; d < 2 ; ++d) {
+ byte[] key = new byte[16];
+ Bytes.putInt(key,0,a);
+ Bytes.putInt(key,4,b);
+ Bytes.putInt(key,8,c);
+ Bytes.putInt(key,12,d);
+ Put row = new Put(key);
+ if (c%2==0) {
+ row.addColumn(family, qf, Bytes.toBytes("abc"));
+ LOG.info("added row:" + Hex.encodeHex(key) + "with value 'abc'");
+ }
+ else {
+ row.addColumn(family, qf, Bytes.toBytes("xyz"));
+ LOG.info("added row:" + Hex.encodeHex(key) + "with value 'xyz'");
+ }
+ }
+ }
+ }
+ }
+ TEST_UTIL.flush();
+ }
+
+ private void setRowFilters(FilterList filterList) {
+ //offset for b as it is second component of "a+b+c+d"
+ //'a' is at offset 0
+ int bOffset = 4;
+ byte[] b10 = Bytes.toBytes(10); //tests b > 10
+ Filter b10Filter = new RowFilter(CompareOperator.GREATER,
+ new BinaryComponentComparator(b10,bOffset));
+ filterList.addFilter(b10Filter);
+
+ byte[] b20 = Bytes.toBytes(20); //tests b < 20
+ Filter b20Filter = new RowFilter(CompareOperator.LESS,
+ new BinaryComponentComparator(b20,bOffset));
+ filterList.addFilter(b20Filter);
+
+ //offset for c as it is third component of "a+b+c+d"
+ int cOffset = 8;
+ byte[] c90 = Bytes.toBytes(90); //tests c > 90
+ Filter c90Filter = new RowFilter(CompareOperator.GREATER,
+ new BinaryComponentComparator(c90,cOffset));
+ filterList.addFilter(c90Filter);
+
+ byte[] c100 = Bytes.toBytes(100); //tests c < 100
+ Filter c100Filter = new RowFilter(CompareOperator.LESS,
+ new BinaryComponentComparator(c100,cOffset));
+ filterList.addFilter(c100Filter);
+
+ //offset for d as it is fourth component of "a+b+c+d"
+ int dOffset = 12;
+ byte[] d1 = Bytes.toBytes(1); //tests d == 1
+ Filter dFilter = new RowFilter(CompareOperator.EQUAL,
+ new BinaryComponentComparator(d1,dOffset));
+
+ filterList.addFilter(dFilter);
+
+ }
+
+ /**
+ * We have rows with either "abc" or "xyz".
+ * We want values which have 'y' at second position
+ * of the string.
+ * As a result only values with "xyz" shall be returned
+ */
+ private void setValueFilters(FilterList filterList) {
+ int offset = 1;
+ byte[] y = Bytes.toBytes("y");
+ Filter yFilter = new ValueFilter(CompareOperator.EQUAL,
+ new BinaryComponentComparator(y,offset));
+ filterList.addFilter(yFilter);
+ }
+
+ private Scan createScan(FilterList list) {
+ //build start and end key for scan
+ byte[] startKey = new byte[16]; //key size with four ints
+ Bytes.putInt(startKey,aOffset,1); //a=1, takes care of a = 1
+ Bytes.putInt(startKey,bOffset,11); //b=11, takes care of b > 10
+ Bytes.putInt(startKey,cOffset,91); //c=91,
+ Bytes.putInt(startKey,dOffset,1); //d=1,
+
+ byte[] endKey = new byte[16];
+ Bytes.putInt(endKey,aOffset,1); //a=1, takes care of a = 1
+ Bytes.putInt(endKey,bOffset,20); //b=20, takes care of b < 20
+ Bytes.putInt(endKey,cOffset,100); //c=100,
+ Bytes.putInt(endKey,dOffset,1); //d=1,
+
+ //setup scan
+ Scan scan = new Scan().withStartRow(startKey).withStopRow(endKey);
+ scan.setFilter(list);
+ return scan;
+ }
+
+ private List getResults(Table ht, Scan scan) throws IOException {
+ ResultScanner scanner = ht.getScanner(scan);
+ List results = new ArrayList<>();
+ Result r;
+ while ((r = scanner.next()) != null) {
+ for (Cell kv : r.listCells()) {
+ results.add(kv);
+ }
+ }
+ scanner.close();
+ return results;
+ }
+
+}
diff --git a/src/main/asciidoc/_chapters/architecture.adoc b/src/main/asciidoc/_chapters/architecture.adoc
index 06119b5fc86..c942e5418e8 100644
--- a/src/main/asciidoc/_chapters/architecture.adoc
+++ b/src/main/asciidoc/_chapters/architecture.adoc
@@ -410,6 +410,20 @@ See link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryP
See link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryComparator.html[BinaryComparator].
+[[client.filter.cvp.bcc]]
+==== BinaryComponentComparator
+
+link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryComponentComparator.html[BinaryComponentComparator] can be used to compare specific value at specific location with in the cell value. The comparison can be done for both ascii and binary data.
+
+[source,java]
+----
+byte[] partialValue = Bytes.toBytes("partial_value");
+ int partialValueOffset =
+ Filter partialValueFilter = new ValueFilter(CompareFilter.CompareOp.GREATER,
+ new BinaryComponentComparator(partialValue,partialValueOffset));
+----
+See link:https://issues.apache.org/jira/browse/HBASE-22969[HBASE-22969] for other use cases and details.
+
[[client.filter.kvm]]
=== KeyValue Metadata
@@ -535,6 +549,8 @@ Note: Introduced in HBase 0.92
It is generally a better idea to use the startRow/stopRow methods on Scan for row selection, however link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/RowFilter.html[RowFilter] can also be used.
+You can supplement a scan (both bounded and unbounded) with RowFilter constructed from link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryComponentComparator.html[BinaryComponentComparator] for further filtering out or filtering in rows. See link:https://issues.apache.org/jira/browse/HBASE-22969[HBASE-22969] for use cases and other details.
+
[[client.filter.utility]]
=== Utility
| | | | |