HBASE-2323 filter.RegexStringComparator does not work with certain bytes
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@923381 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
29bc8bb519
commit
11999bd2b3
|
@ -239,6 +239,8 @@ Release 0.21.0 - Unreleased
|
||||||
HBASE-2023 Client sync block can cause 1 thread of a multi-threaded client
|
HBASE-2023 Client sync block can cause 1 thread of a multi-threaded client
|
||||||
to block all others (Karthik Ranganathan via Stack)
|
to block all others (Karthik Ranganathan via Stack)
|
||||||
HBASE-2305 Client port for ZK has no default (Suraj Varma via Stack)
|
HBASE-2305 Client port for ZK has no default (Suraj Varma via Stack)
|
||||||
|
HBASE-2323 filter.RegexStringComparator does not work with certain bytes
|
||||||
|
(Benoit Sigoure via Stack)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HBASE-1760 Cleanup TODOs in HTable
|
HBASE-1760 Cleanup TODOs in HTable
|
||||||
|
|
|
@ -19,20 +19,26 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.filter;
|
package org.apache.hadoop.hbase.filter;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
import java.io.DataInput;
|
import java.io.DataInput;
|
||||||
import java.io.DataOutput;
|
import java.io.DataOutput;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.charset.IllegalCharsetNameException;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This comparator is for use with {@link CompareFilter} implementations, such
|
* This comparator is for use with {@link CompareFilter} implementations, such
|
||||||
* as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for
|
* as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for
|
||||||
* filtering based on the value of a given column. Use it to test if a given
|
* filtering based on the value of a given column. Use it to test if a given
|
||||||
* regular expression matches a cell value in the column.
|
* regular expression matches a cell value in the column.
|
||||||
* <p>
|
* <p>
|
||||||
* Only EQUAL or NOT_EQUAL comparisons are valid with this comparator.
|
* Only EQUAL or NOT_EQUAL comparisons are valid with this comparator.
|
||||||
* <p>
|
* <p>
|
||||||
* For example:
|
* For example:
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -50,6 +56,10 @@ import java.util.regex.Pattern;
|
||||||
*/
|
*/
|
||||||
public class RegexStringComparator extends WritableByteArrayComparable {
|
public class RegexStringComparator extends WritableByteArrayComparable {
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(RegexStringComparator.class);
|
||||||
|
|
||||||
|
private Charset charset = Charset.forName(HConstants.UTF8_ENCODING);
|
||||||
|
|
||||||
private Pattern pattern;
|
private Pattern pattern;
|
||||||
|
|
||||||
/** Nullary constructor for Writable, do not use */
|
/** Nullary constructor for Writable, do not use */
|
||||||
|
@ -61,26 +71,50 @@ public class RegexStringComparator extends WritableByteArrayComparable {
|
||||||
*/
|
*/
|
||||||
public RegexStringComparator(String expr) {
|
public RegexStringComparator(String expr) {
|
||||||
super(Bytes.toBytes(expr));
|
super(Bytes.toBytes(expr));
|
||||||
this.pattern = Pattern.compile(expr);
|
this.pattern = Pattern.compile(expr, Pattern.DOTALL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specifies the {@link Charset} to use to convert the row key to a String.
|
||||||
|
* <p>
|
||||||
|
* The row key needs to be converted to a String in order to be matched
|
||||||
|
* against the regular expression. This method controls which charset is
|
||||||
|
* used to do this conversion.
|
||||||
|
* <p>
|
||||||
|
* If the row key is made of arbitrary bytes, the charset {@code ISO-8859-1}
|
||||||
|
* is recommended.
|
||||||
|
* @param charset The charset to use.
|
||||||
|
*/
|
||||||
|
public void setCharset(final Charset charset) {
|
||||||
|
this.charset = charset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareTo(byte[] value) {
|
public int compareTo(byte[] value) {
|
||||||
// Use find() for subsequence match instead of matches() (full sequence
|
// Use find() for subsequence match instead of matches() (full sequence
|
||||||
// match) to adhere to the principle of least surprise.
|
// match) to adhere to the principle of least surprise.
|
||||||
return pattern.matcher(Bytes.toString(value)).find() ? 0 : 1;
|
return pattern.matcher(new String(value, charset)).find() ? 0 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void readFields(DataInput in) throws IOException {
|
public void readFields(DataInput in) throws IOException {
|
||||||
String expr = in.readUTF();
|
final String expr = in.readUTF();
|
||||||
this.value = Bytes.toBytes(expr);
|
this.value = Bytes.toBytes(expr);
|
||||||
this.pattern = Pattern.compile(expr);
|
this.pattern = Pattern.compile(expr);
|
||||||
|
final String charset = in.readUTF();
|
||||||
|
if (charset.length() > 0) {
|
||||||
|
try {
|
||||||
|
this.charset = Charset.forName(charset);
|
||||||
|
} catch (IllegalCharsetNameException e) {
|
||||||
|
LOG.error("invalid charset", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void write(DataOutput out) throws IOException {
|
public void write(DataOutput out) throws IOException {
|
||||||
out.writeUTF(pattern.toString());
|
out.writeUTF(pattern.toString());
|
||||||
|
out.writeUTF(charset.name());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue