HBASE-1005 Regex and string comparison operators for ColumnValueFilter
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@718837 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ddc345efa6
commit
6c89d36cef
|
@ -76,6 +76,7 @@ Release 0.19.0 - Unreleased
|
||||||
TRUNK/0.19.0.
|
TRUNK/0.19.0.
|
||||||
HBASE-1003 If cell exceeds TTL but not VERSIONs, will not be removed during
|
HBASE-1003 If cell exceeds TTL but not VERSIONs, will not be removed during
|
||||||
major compaction
|
major compaction
|
||||||
|
HBASE-1005 Regex and string comparison operators for ColumnValueFilter
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HBASE-901 Add a limit to key length, check key and value length on client side
|
HBASE-901 Add a limit to key length, check key and value length on client side
|
||||||
|
|
|
@ -53,11 +53,7 @@ public class ColumnValueFilter implements RowFilterInterface {
|
||||||
/** greater than or equal to */
|
/** greater than or equal to */
|
||||||
GREATER_OR_EQUAL,
|
GREATER_OR_EQUAL,
|
||||||
/** greater than */
|
/** greater than */
|
||||||
GREATER,
|
GREATER;
|
||||||
// Below are more specific operators.
|
|
||||||
/** sub-string. Case insensitive. */
|
|
||||||
SUB_STRING;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] columnName;
|
private byte[] columnName;
|
||||||
|
@ -143,12 +139,6 @@ public class ColumnValueFilter implements RowFilterInterface {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean filterColumnValue(final byte [] data) {
|
private boolean filterColumnValue(final byte [] data) {
|
||||||
// Special case for Substring operator
|
|
||||||
if (compareOp == CompareOp.SUB_STRING) {
|
|
||||||
return !Bytes.toString(data).toLowerCase().contains(
|
|
||||||
(Bytes.toString(value)).toLowerCase());
|
|
||||||
}
|
|
||||||
|
|
||||||
int compareResult;
|
int compareResult;
|
||||||
if (comparator != null) {
|
if (comparator != null) {
|
||||||
compareResult = comparator.compareTo(data);
|
compareResult = comparator.compareTo(data);
|
||||||
|
@ -179,10 +169,11 @@ public class ColumnValueFilter implements RowFilterInterface {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean filterRow(final SortedMap<byte[], Cell> columns) {
|
public boolean filterRow(final SortedMap<byte[], Cell> columns) {
|
||||||
|
if (columns == null)
|
||||||
|
return false;
|
||||||
if (filterIfColumnMissing) {
|
if (filterIfColumnMissing) {
|
||||||
return !columns.containsKey(columnName);
|
return !columns.containsKey(columnName);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise we must do the filter here
|
// Otherwise we must do the filter here
|
||||||
Cell colCell = columns.get(columnName);
|
Cell colCell = columns.get(columnName);
|
||||||
if (colCell == null) {
|
if (colCell == null) {
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2008 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.filter;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This comparator is for use with ColumnValueFilter, for filtering based on
|
||||||
|
* the value of a given column. Use it to test if a given regular expression
|
||||||
|
* matches a cell value in the column.
|
||||||
|
* <p>
|
||||||
|
* Only EQUAL or NOT_EQUAL tests are valid with this comparator.
|
||||||
|
* <p>
|
||||||
|
* For example:
|
||||||
|
* <p>
|
||||||
|
* <pre>
|
||||||
|
* ColumnValueFilter cvf =
|
||||||
|
* new ColumnValueFilter("col",
|
||||||
|
* ColumnValueFilter.CompareOp.EQUAL,
|
||||||
|
* new RegexStringComparator(
|
||||||
|
* // v4 IP address
|
||||||
|
* "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" +
|
||||||
|
* "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" +
|
||||||
|
* "|" +
|
||||||
|
* // v6 IP address
|
||||||
|
* "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" +
|
||||||
|
* "{3}[\\d]{1,3})?)(\\/[0-9]+)?"));
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
public class RegexStringComparator implements WritableByteArrayComparable {
|
||||||
|
|
||||||
|
private Pattern pattern;
|
||||||
|
|
||||||
|
/** Nullary constructor for Writable */
|
||||||
|
public RegexStringComparator() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
* @param expr a valid regular expression
|
||||||
|
*/
|
||||||
|
public RegexStringComparator(String expr) {
|
||||||
|
this.pattern = Pattern.compile(expr);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(byte[] value) {
|
||||||
|
// Use find() for subsequence match instead of matches() (full sequence
|
||||||
|
// match) to adhere to the principle of least surprise.
|
||||||
|
return pattern.matcher(Bytes.toString(value)).find() ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException {
|
||||||
|
this.pattern = Pattern.compile(in.readUTF());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException {
|
||||||
|
out.writeUTF(pattern.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,74 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2008 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.filter;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This comparator is for use with ColumnValueFilter, for filtering based on
|
||||||
|
* the value of a given column. Use it to test if a given substring appears
|
||||||
|
* in a cell value in the column. The comparison is case insensitive.
|
||||||
|
* <p>
|
||||||
|
* Only EQUAL or NOT_EQUAL tests are valid with this comparator.
|
||||||
|
* <p>
|
||||||
|
* For example:
|
||||||
|
* <p>
|
||||||
|
* <pre>
|
||||||
|
* ColumnValueFilter cvf =
|
||||||
|
* new ColumnValueFilter("col", ColumnValueFilter.CompareOp.EQUAL,
|
||||||
|
* new SubstringComparator("substr"));
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
public class SubstringComparator implements WritableByteArrayComparable {
|
||||||
|
|
||||||
|
private String substr;
|
||||||
|
|
||||||
|
/** Nullary constructor for Writable */
|
||||||
|
public SubstringComparator() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
* @param substr the substring
|
||||||
|
*/
|
||||||
|
public SubstringComparator(String substr) {
|
||||||
|
this.substr = substr.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(byte[] value) {
|
||||||
|
return Bytes.toString(value).toLowerCase().contains(substr) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException {
|
||||||
|
substr = in.readUTF();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException {
|
||||||
|
out.writeUTF(substr);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,143 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2007 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.filter;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.DataInputStream;
|
||||||
|
import java.io.DataOutputStream;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the stop row filter
|
||||||
|
*/
|
||||||
|
public class TestColumnValueFilter extends TestCase {
|
||||||
|
|
||||||
|
private static final byte[] ROW = Bytes.toBytes("test");
|
||||||
|
private static final byte[] COLUMN = Bytes.toBytes("test:foo");
|
||||||
|
private static final byte[] VAL_1 = Bytes.toBytes("a");
|
||||||
|
private static final byte[] VAL_2 = Bytes.toBytes("ab");
|
||||||
|
private static final byte[] VAL_3 = Bytes.toBytes("abc");
|
||||||
|
private static final byte[] VAL_4 = Bytes.toBytes("abcd");
|
||||||
|
private static final byte[] FULLSTRING_1 =
|
||||||
|
Bytes.toBytes("The quick brown fox jumps over the lazy dog.");
|
||||||
|
private static final byte[] FULLSTRING_2 =
|
||||||
|
Bytes.toBytes("The slow grey fox trips over the lazy dog.");
|
||||||
|
private static final String QUICK_SUBSTR = "quick";
|
||||||
|
private static final String QUICK_REGEX = "[q][u][i][c][k]";
|
||||||
|
|
||||||
|
private RowFilterInterface basicFilterNew() {
|
||||||
|
return new ColumnValueFilter(COLUMN,
|
||||||
|
ColumnValueFilter.CompareOp.GREATER_OR_EQUAL, VAL_2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private RowFilterInterface substrFilterNew() {
|
||||||
|
return new ColumnValueFilter(COLUMN, ColumnValueFilter.CompareOp.EQUAL,
|
||||||
|
new SubstringComparator(QUICK_SUBSTR));
|
||||||
|
}
|
||||||
|
|
||||||
|
private RowFilterInterface regexFilterNew() {
|
||||||
|
return new ColumnValueFilter(COLUMN, ColumnValueFilter.CompareOp.EQUAL,
|
||||||
|
new RegexStringComparator(QUICK_REGEX));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void basicFilterTests(RowFilterInterface filter)
|
||||||
|
throws Exception {
|
||||||
|
assertTrue("basicFilter1", filter.filterColumn(ROW, COLUMN, VAL_1));
|
||||||
|
assertFalse("basicFilter2", filter.filterColumn(ROW, COLUMN, VAL_2));
|
||||||
|
assertFalse("basicFilter3", filter.filterColumn(ROW, COLUMN, VAL_3));
|
||||||
|
assertFalse("basicFilter4", filter.filterColumn(ROW, COLUMN, VAL_4));
|
||||||
|
assertFalse("basicFilterAllRemaining", filter.filterAllRemaining());
|
||||||
|
assertFalse("basicFilterNotNull", filter.filterRow(null));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void substrFilterTests(RowFilterInterface filter)
|
||||||
|
throws Exception {
|
||||||
|
assertTrue("substrTrue", filter.filterColumn(ROW, COLUMN, FULLSTRING_1));
|
||||||
|
assertFalse("substrFalse", filter.filterColumn(ROW, COLUMN, FULLSTRING_2));
|
||||||
|
assertFalse("substrFilterAllRemaining", filter.filterAllRemaining());
|
||||||
|
assertFalse("substrFilterNotNull", filter.filterRow(null));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void regexFilterTests(RowFilterInterface filter)
|
||||||
|
throws Exception {
|
||||||
|
assertTrue("regexTrue", filter.filterColumn(ROW, COLUMN, FULLSTRING_1));
|
||||||
|
assertFalse("regexFalse", filter.filterColumn(ROW, COLUMN, FULLSTRING_2));
|
||||||
|
assertFalse("regexFilterAllRemaining", filter.filterAllRemaining());
|
||||||
|
assertFalse("regexFilterNotNull", filter.filterRow(null));
|
||||||
|
}
|
||||||
|
|
||||||
|
private RowFilterInterface serializationTest(RowFilterInterface filter)
|
||||||
|
throws Exception {
|
||||||
|
// Decompose filter to bytes.
|
||||||
|
ByteArrayOutputStream stream = new ByteArrayOutputStream();
|
||||||
|
DataOutputStream out = new DataOutputStream(stream);
|
||||||
|
filter.write(out);
|
||||||
|
out.close();
|
||||||
|
byte[] buffer = stream.toByteArray();
|
||||||
|
|
||||||
|
// Recompose filter.
|
||||||
|
DataInputStream in =
|
||||||
|
new DataInputStream(new ByteArrayInputStream(buffer));
|
||||||
|
RowFilterInterface newFilter = new ColumnValueFilter();
|
||||||
|
newFilter.readFields(in);
|
||||||
|
|
||||||
|
return newFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
RowFilterInterface basicFilter;
|
||||||
|
RowFilterInterface substrFilter;
|
||||||
|
RowFilterInterface regexFilter;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
basicFilter = basicFilterNew();
|
||||||
|
substrFilter = substrFilterNew();
|
||||||
|
regexFilter = regexFilterNew();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests identification of the stop row
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
public void testStop() throws Exception {
|
||||||
|
basicFilterTests(basicFilter);
|
||||||
|
substrFilterTests(substrFilter);
|
||||||
|
regexFilterTests(regexFilter);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests serialization
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
public void testSerialization() throws Exception {
|
||||||
|
RowFilterInterface newFilter = serializationTest(basicFilter);
|
||||||
|
basicFilterTests(newFilter);
|
||||||
|
newFilter = serializationTest(substrFilter);
|
||||||
|
substrFilterTests(newFilter);
|
||||||
|
newFilter = serializationTest(regexFilter);
|
||||||
|
regexFilterTests(newFilter);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue