diff --git a/CHANGES.txt b/CHANGES.txt index 516887e61e7..8e34e33223c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -76,6 +76,7 @@ Release 0.19.0 - Unreleased TRUNK/0.19.0. HBASE-1003 If cell exceeds TTL but not VERSIONs, will not be removed during major compaction + HBASE-1005 Regex and string comparison operators for ColumnValueFilter IMPROVEMENTS HBASE-901 Add a limit to key length, check key and value length on client side diff --git a/src/java/org/apache/hadoop/hbase/filter/ColumnValueFilter.java b/src/java/org/apache/hadoop/hbase/filter/ColumnValueFilter.java index 575368ce4fe..07b1bc74401 100644 --- a/src/java/org/apache/hadoop/hbase/filter/ColumnValueFilter.java +++ b/src/java/org/apache/hadoop/hbase/filter/ColumnValueFilter.java @@ -53,11 +53,7 @@ public class ColumnValueFilter implements RowFilterInterface { /** greater than or equal to */ GREATER_OR_EQUAL, /** greater than */ - GREATER, - // Below are more specific operators. - /** sub-string. Case insensitive. */ - SUB_STRING; - + GREATER; } private byte[] columnName; @@ -143,12 +139,6 @@ public class ColumnValueFilter implements RowFilterInterface { } private boolean filterColumnValue(final byte [] data) { - // Special case for Substring operator - if (compareOp == CompareOp.SUB_STRING) { - return !Bytes.toString(data).toLowerCase().contains( - (Bytes.toString(value)).toLowerCase()); - } - int compareResult; if (comparator != null) { compareResult = comparator.compareTo(data); @@ -179,10 +169,11 @@ public class ColumnValueFilter implements RowFilterInterface { } public boolean filterRow(final SortedMap columns) { + if (columns == null) + return false; if (filterIfColumnMissing) { return !columns.containsKey(columnName); } - // Otherwise we must do the filter here Cell colCell = columns.get(columnName); if (colCell == null) { diff --git a/src/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java b/src/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java new file mode 100644 index 00000000000..c3658741834 --- /dev/null +++ b/src/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java @@ -0,0 +1,85 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.regex.Pattern; + +import org.apache.hadoop.hbase.util.Bytes; + +/** + * This comparator is for use with ColumnValueFilter, for filtering based on + * the value of a given column. Use it to test if a given regular expression + * matches a cell value in the column. + *

+ * Only EQUAL or NOT_EQUAL tests are valid with this comparator. + *

+ * For example: + *

+ *

+ * ColumnValueFilter cvf =
+ *   new ColumnValueFilter("col",
+ *     ColumnValueFilter.CompareOp.EQUAL,
+ *     new RegexStringComparator(
+ *       // v4 IP address
+ *       "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" +
+ *         "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" +
+ *         "|" +
+ *       // v6 IP address
+ *       "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" +
+ *         "{3}[\\d]{1,3})?)(\\/[0-9]+)?"));
+ * 
+ */ +public class RegexStringComparator implements WritableByteArrayComparable { + + private Pattern pattern; + + /** Nullary constructor for Writable */ + public RegexStringComparator() { + } + + /** + * Constructor + * @param expr a valid regular expression + */ + public RegexStringComparator(String expr) { + this.pattern = Pattern.compile(expr); + } + + @Override + public int compareTo(byte[] value) { + // Use find() for subsequence match instead of matches() (full sequence + // match) to adhere to the principle of least surprise. + return pattern.matcher(Bytes.toString(value)).find() ? 1 : 0; + } + + @Override + public void readFields(DataInput in) throws IOException { + this.pattern = Pattern.compile(in.readUTF()); + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeUTF(pattern.toString()); + } + +} diff --git a/src/java/org/apache/hadoop/hbase/filter/SubstringComparator.java b/src/java/org/apache/hadoop/hbase/filter/SubstringComparator.java new file mode 100644 index 00000000000..6358ded84fd --- /dev/null +++ b/src/java/org/apache/hadoop/hbase/filter/SubstringComparator.java @@ -0,0 +1,74 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.hbase.util.Bytes; + +/** + * This comparator is for use with ColumnValueFilter, for filtering based on + * the value of a given column. Use it to test if a given substring appears + * in a cell value in the column. The comparison is case insensitive. + *

+ * Only EQUAL or NOT_EQUAL tests are valid with this comparator. + *

+ * For example: + *

+ *

+ * ColumnValueFilter cvf =
+ *   new ColumnValueFilter("col", ColumnValueFilter.CompareOp.EQUAL,
+ *     new SubstringComparator("substr"));
+ * 
+ */ +public class SubstringComparator implements WritableByteArrayComparable { + + private String substr; + + /** Nullary constructor for Writable */ + public SubstringComparator() { + } + + /** + * Constructor + * @param substr the substring + */ + public SubstringComparator(String substr) { + this.substr = substr.toLowerCase(); + } + + @Override + public int compareTo(byte[] value) { + return Bytes.toString(value).toLowerCase().contains(substr) ? 1 : 0; + } + + @Override + public void readFields(DataInput in) throws IOException { + substr = in.readUTF(); + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeUTF(substr); + } + +} diff --git a/src/test/org/apache/hadoop/hbase/filter/TestColumnValueFilter.java b/src/test/org/apache/hadoop/hbase/filter/TestColumnValueFilter.java new file mode 100755 index 00000000000..c50c9ec9313 --- /dev/null +++ b/src/test/org/apache/hadoop/hbase/filter/TestColumnValueFilter.java @@ -0,0 +1,143 @@ +/** + * Copyright 2007 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; + +import org.apache.hadoop.hbase.util.Bytes; + +import junit.framework.TestCase; + +/** + * Tests the stop row filter + */ +public class TestColumnValueFilter extends TestCase { + + private static final byte[] ROW = Bytes.toBytes("test"); + private static final byte[] COLUMN = Bytes.toBytes("test:foo"); + private static final byte[] VAL_1 = Bytes.toBytes("a"); + private static final byte[] VAL_2 = Bytes.toBytes("ab"); + private static final byte[] VAL_3 = Bytes.toBytes("abc"); + private static final byte[] VAL_4 = Bytes.toBytes("abcd"); + private static final byte[] FULLSTRING_1 = + Bytes.toBytes("The quick brown fox jumps over the lazy dog."); + private static final byte[] FULLSTRING_2 = + Bytes.toBytes("The slow grey fox trips over the lazy dog."); + private static final String QUICK_SUBSTR = "quick"; + private static final String QUICK_REGEX = "[q][u][i][c][k]"; + + private RowFilterInterface basicFilterNew() { + return new ColumnValueFilter(COLUMN, + ColumnValueFilter.CompareOp.GREATER_OR_EQUAL, VAL_2); + } + + private RowFilterInterface substrFilterNew() { + return new ColumnValueFilter(COLUMN, ColumnValueFilter.CompareOp.EQUAL, + new SubstringComparator(QUICK_SUBSTR)); + } + + private RowFilterInterface regexFilterNew() { + return new ColumnValueFilter(COLUMN, ColumnValueFilter.CompareOp.EQUAL, + new RegexStringComparator(QUICK_REGEX)); + } + + private void basicFilterTests(RowFilterInterface filter) + throws Exception { + assertTrue("basicFilter1", filter.filterColumn(ROW, COLUMN, VAL_1)); + assertFalse("basicFilter2", filter.filterColumn(ROW, COLUMN, VAL_2)); + assertFalse("basicFilter3", filter.filterColumn(ROW, COLUMN, VAL_3)); + assertFalse("basicFilter4", filter.filterColumn(ROW, COLUMN, VAL_4)); + assertFalse("basicFilterAllRemaining", filter.filterAllRemaining()); + assertFalse("basicFilterNotNull", filter.filterRow(null)); + } + + private void substrFilterTests(RowFilterInterface filter) + throws Exception { + assertTrue("substrTrue", filter.filterColumn(ROW, COLUMN, FULLSTRING_1)); + assertFalse("substrFalse", filter.filterColumn(ROW, COLUMN, FULLSTRING_2)); + assertFalse("substrFilterAllRemaining", filter.filterAllRemaining()); + assertFalse("substrFilterNotNull", filter.filterRow(null)); + } + + private void regexFilterTests(RowFilterInterface filter) + throws Exception { + assertTrue("regexTrue", filter.filterColumn(ROW, COLUMN, FULLSTRING_1)); + assertFalse("regexFalse", filter.filterColumn(ROW, COLUMN, FULLSTRING_2)); + assertFalse("regexFilterAllRemaining", filter.filterAllRemaining()); + assertFalse("regexFilterNotNull", filter.filterRow(null)); + } + + private RowFilterInterface serializationTest(RowFilterInterface filter) + throws Exception { + // Decompose filter to bytes. + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(stream); + filter.write(out); + out.close(); + byte[] buffer = stream.toByteArray(); + + // Recompose filter. + DataInputStream in = + new DataInputStream(new ByteArrayInputStream(buffer)); + RowFilterInterface newFilter = new ColumnValueFilter(); + newFilter.readFields(in); + + return newFilter; + } + + RowFilterInterface basicFilter; + RowFilterInterface substrFilter; + RowFilterInterface regexFilter; + + @Override + protected void setUp() throws Exception { + super.setUp(); + basicFilter = basicFilterNew(); + substrFilter = substrFilterNew(); + regexFilter = regexFilterNew(); + } + + /** + * Tests identification of the stop row + * @throws Exception + */ + public void testStop() throws Exception { + basicFilterTests(basicFilter); + substrFilterTests(substrFilter); + regexFilterTests(regexFilter); + } + + /** + * Tests serialization + * @throws Exception + */ + public void testSerialization() throws Exception { + RowFilterInterface newFilter = serializationTest(basicFilter); + basicFilterTests(newFilter); + newFilter = serializationTest(substrFilter); + substrFilterTests(newFilter); + newFilter = serializationTest(regexFilter); + regexFilterTests(newFilter); + } + +}