HBASE-1005 Regex and string comparison operators for ColumnValueFilter
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@718837 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ddc345efa6
commit
6c89d36cef
|
@ -76,6 +76,7 @@ Release 0.19.0 - Unreleased
|
|||
TRUNK/0.19.0.
|
||||
HBASE-1003 If cell exceeds TTL but not VERSIONs, will not be removed during
|
||||
major compaction
|
||||
HBASE-1005 Regex and string comparison operators for ColumnValueFilter
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-901 Add a limit to key length, check key and value length on client side
|
||||
|
|
|
@ -53,11 +53,7 @@ public class ColumnValueFilter implements RowFilterInterface {
|
|||
/** greater than or equal to */
|
||||
GREATER_OR_EQUAL,
|
||||
/** greater than */
|
||||
GREATER,
|
||||
// Below are more specific operators.
|
||||
/** sub-string. Case insensitive. */
|
||||
SUB_STRING;
|
||||
|
||||
GREATER;
|
||||
}
|
||||
|
||||
private byte[] columnName;
|
||||
|
@ -143,12 +139,6 @@ public class ColumnValueFilter implements RowFilterInterface {
|
|||
}
|
||||
|
||||
private boolean filterColumnValue(final byte [] data) {
|
||||
// Special case for Substring operator
|
||||
if (compareOp == CompareOp.SUB_STRING) {
|
||||
return !Bytes.toString(data).toLowerCase().contains(
|
||||
(Bytes.toString(value)).toLowerCase());
|
||||
}
|
||||
|
||||
int compareResult;
|
||||
if (comparator != null) {
|
||||
compareResult = comparator.compareTo(data);
|
||||
|
@ -179,10 +169,11 @@ public class ColumnValueFilter implements RowFilterInterface {
|
|||
}
|
||||
|
||||
public boolean filterRow(final SortedMap<byte[], Cell> columns) {
|
||||
if (columns == null)
|
||||
return false;
|
||||
if (filterIfColumnMissing) {
|
||||
return !columns.containsKey(columnName);
|
||||
}
|
||||
|
||||
// Otherwise we must do the filter here
|
||||
Cell colCell = columns.get(columnName);
|
||||
if (colCell == null) {
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
/**
|
||||
* This comparator is for use with ColumnValueFilter, for filtering based on
|
||||
* the value of a given column. Use it to test if a given regular expression
|
||||
* matches a cell value in the column.
|
||||
* <p>
|
||||
* Only EQUAL or NOT_EQUAL tests are valid with this comparator.
|
||||
* <p>
|
||||
* For example:
|
||||
* <p>
|
||||
* <pre>
|
||||
* ColumnValueFilter cvf =
|
||||
* new ColumnValueFilter("col",
|
||||
* ColumnValueFilter.CompareOp.EQUAL,
|
||||
* new RegexStringComparator(
|
||||
* // v4 IP address
|
||||
* "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" +
|
||||
* "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" +
|
||||
* "|" +
|
||||
* // v6 IP address
|
||||
* "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" +
|
||||
* "{3}[\\d]{1,3})?)(\\/[0-9]+)?"));
|
||||
* </pre>
|
||||
*/
|
||||
public class RegexStringComparator implements WritableByteArrayComparable {
|
||||
|
||||
private Pattern pattern;
|
||||
|
||||
/** Nullary constructor for Writable */
|
||||
public RegexStringComparator() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param expr a valid regular expression
|
||||
*/
|
||||
public RegexStringComparator(String expr) {
|
||||
this.pattern = Pattern.compile(expr);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(byte[] value) {
|
||||
// Use find() for subsequence match instead of matches() (full sequence
|
||||
// match) to adhere to the principle of least surprise.
|
||||
return pattern.matcher(Bytes.toString(value)).find() ? 1 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
this.pattern = Pattern.compile(in.readUTF());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeUTF(pattern.toString());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
/**
|
||||
* This comparator is for use with ColumnValueFilter, for filtering based on
|
||||
* the value of a given column. Use it to test if a given substring appears
|
||||
* in a cell value in the column. The comparison is case insensitive.
|
||||
* <p>
|
||||
* Only EQUAL or NOT_EQUAL tests are valid with this comparator.
|
||||
* <p>
|
||||
* For example:
|
||||
* <p>
|
||||
* <pre>
|
||||
* ColumnValueFilter cvf =
|
||||
* new ColumnValueFilter("col", ColumnValueFilter.CompareOp.EQUAL,
|
||||
* new SubstringComparator("substr"));
|
||||
* </pre>
|
||||
*/
|
||||
public class SubstringComparator implements WritableByteArrayComparable {
|
||||
|
||||
private String substr;
|
||||
|
||||
/** Nullary constructor for Writable */
|
||||
public SubstringComparator() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param substr the substring
|
||||
*/
|
||||
public SubstringComparator(String substr) {
|
||||
this.substr = substr.toLowerCase();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(byte[] value) {
|
||||
return Bytes.toString(value).toLowerCase().contains(substr) ? 1 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
substr = in.readUTF();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeUTF(substr);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,143 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Tests the stop row filter
|
||||
*/
|
||||
public class TestColumnValueFilter extends TestCase {
|
||||
|
||||
private static final byte[] ROW = Bytes.toBytes("test");
|
||||
private static final byte[] COLUMN = Bytes.toBytes("test:foo");
|
||||
private static final byte[] VAL_1 = Bytes.toBytes("a");
|
||||
private static final byte[] VAL_2 = Bytes.toBytes("ab");
|
||||
private static final byte[] VAL_3 = Bytes.toBytes("abc");
|
||||
private static final byte[] VAL_4 = Bytes.toBytes("abcd");
|
||||
private static final byte[] FULLSTRING_1 =
|
||||
Bytes.toBytes("The quick brown fox jumps over the lazy dog.");
|
||||
private static final byte[] FULLSTRING_2 =
|
||||
Bytes.toBytes("The slow grey fox trips over the lazy dog.");
|
||||
private static final String QUICK_SUBSTR = "quick";
|
||||
private static final String QUICK_REGEX = "[q][u][i][c][k]";
|
||||
|
||||
private RowFilterInterface basicFilterNew() {
|
||||
return new ColumnValueFilter(COLUMN,
|
||||
ColumnValueFilter.CompareOp.GREATER_OR_EQUAL, VAL_2);
|
||||
}
|
||||
|
||||
private RowFilterInterface substrFilterNew() {
|
||||
return new ColumnValueFilter(COLUMN, ColumnValueFilter.CompareOp.EQUAL,
|
||||
new SubstringComparator(QUICK_SUBSTR));
|
||||
}
|
||||
|
||||
private RowFilterInterface regexFilterNew() {
|
||||
return new ColumnValueFilter(COLUMN, ColumnValueFilter.CompareOp.EQUAL,
|
||||
new RegexStringComparator(QUICK_REGEX));
|
||||
}
|
||||
|
||||
private void basicFilterTests(RowFilterInterface filter)
|
||||
throws Exception {
|
||||
assertTrue("basicFilter1", filter.filterColumn(ROW, COLUMN, VAL_1));
|
||||
assertFalse("basicFilter2", filter.filterColumn(ROW, COLUMN, VAL_2));
|
||||
assertFalse("basicFilter3", filter.filterColumn(ROW, COLUMN, VAL_3));
|
||||
assertFalse("basicFilter4", filter.filterColumn(ROW, COLUMN, VAL_4));
|
||||
assertFalse("basicFilterAllRemaining", filter.filterAllRemaining());
|
||||
assertFalse("basicFilterNotNull", filter.filterRow(null));
|
||||
}
|
||||
|
||||
private void substrFilterTests(RowFilterInterface filter)
|
||||
throws Exception {
|
||||
assertTrue("substrTrue", filter.filterColumn(ROW, COLUMN, FULLSTRING_1));
|
||||
assertFalse("substrFalse", filter.filterColumn(ROW, COLUMN, FULLSTRING_2));
|
||||
assertFalse("substrFilterAllRemaining", filter.filterAllRemaining());
|
||||
assertFalse("substrFilterNotNull", filter.filterRow(null));
|
||||
}
|
||||
|
||||
private void regexFilterTests(RowFilterInterface filter)
|
||||
throws Exception {
|
||||
assertTrue("regexTrue", filter.filterColumn(ROW, COLUMN, FULLSTRING_1));
|
||||
assertFalse("regexFalse", filter.filterColumn(ROW, COLUMN, FULLSTRING_2));
|
||||
assertFalse("regexFilterAllRemaining", filter.filterAllRemaining());
|
||||
assertFalse("regexFilterNotNull", filter.filterRow(null));
|
||||
}
|
||||
|
||||
private RowFilterInterface serializationTest(RowFilterInterface filter)
|
||||
throws Exception {
|
||||
// Decompose filter to bytes.
|
||||
ByteArrayOutputStream stream = new ByteArrayOutputStream();
|
||||
DataOutputStream out = new DataOutputStream(stream);
|
||||
filter.write(out);
|
||||
out.close();
|
||||
byte[] buffer = stream.toByteArray();
|
||||
|
||||
// Recompose filter.
|
||||
DataInputStream in =
|
||||
new DataInputStream(new ByteArrayInputStream(buffer));
|
||||
RowFilterInterface newFilter = new ColumnValueFilter();
|
||||
newFilter.readFields(in);
|
||||
|
||||
return newFilter;
|
||||
}
|
||||
|
||||
RowFilterInterface basicFilter;
|
||||
RowFilterInterface substrFilter;
|
||||
RowFilterInterface regexFilter;
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
basicFilter = basicFilterNew();
|
||||
substrFilter = substrFilterNew();
|
||||
regexFilter = regexFilterNew();
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests identification of the stop row
|
||||
* @throws Exception
|
||||
*/
|
||||
public void testStop() throws Exception {
|
||||
basicFilterTests(basicFilter);
|
||||
substrFilterTests(substrFilter);
|
||||
regexFilterTests(regexFilter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests serialization
|
||||
* @throws Exception
|
||||
*/
|
||||
public void testSerialization() throws Exception {
|
||||
RowFilterInterface newFilter = serializationTest(basicFilter);
|
||||
basicFilterTests(newFilter);
|
||||
newFilter = serializationTest(substrFilter);
|
||||
substrFilterTests(newFilter);
|
||||
newFilter = serializationTest(regexFilter);
|
||||
regexFilterTests(newFilter);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue