HADOOP-1531 Add RowFilter to HRegion.HScanner.
Adds a row/column filter interface and two implementations: A pager and a row/column-value regex filter. M src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java M src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionInterface.java (openScanner): Add override that specifies a row fliter. M src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java (obtainScanner): Add override that specifies a row fliter. (ColumnScanner): Add filter parameter to constructor. M src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java (getScanner): Add override with filter parameter. (next): Add handling of filtering. A src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/InvalidRowFilterException.java A src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java A src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java A src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java A src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java Row-filter interface, exception and implementations. A src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRegExpRowFilter.java A src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestPageRowFilter.java Simple pager and regex filter tests. git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@553620 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9eb369c266
commit
73183e6b97
|
@ -38,11 +38,15 @@ Trunk (unreleased changes)
|
|||
23. HADOOP-1509. Made methods/inner classes in HRegionServer and HClient protected
|
||||
instead of private for easier extension. Also made HRegion and HRegionInfo public too.
|
||||
Added an hbase-default.xml property for specifying what HRegionInterface extension to use
|
||||
for proxy server connection.
|
||||
for proxy server connection. (James Kennedy via Jim Kellerman)
|
||||
24. HADOOP-1534. [hbase] Memcache scanner fails if start key not present
|
||||
25. HADOOP-1537. Catch exceptions in testCleanRegionServerExit so we can see
|
||||
what is failing.
|
||||
26. HADOOP-1543 [hbase] Add HClient.tableExists
|
||||
27. HADOOP-1519 [hbase] map/reduce interface for HBase
|
||||
27. HADOOP-1519 [hbase] map/reduce interface for HBase. (Vuk Ercegovac and
|
||||
Jim Kellerman)
|
||||
28. HADOOP-1523 Hung region server waiting on write locks
|
||||
29. HADOOP-1560 NPE in MiniHBaseCluster on Windows
|
||||
30. HADOOP-1531 Add RowFilter to HRegion.HScanner
|
||||
Adds a row filtering interface and two implemenentations: A page scanner,
|
||||
and a regex row/column-data matcher. (James Kennedy via Stack)
|
||||
|
|
|
@ -30,6 +30,7 @@ import java.util.concurrent.TimeUnit;
|
|||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.filter.RowFilterInterface;
|
||||
import org.apache.hadoop.hbase.io.KeyedData;
|
||||
import org.apache.hadoop.io.DataInputBuffer;
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
@ -1173,18 +1174,34 @@ public class HClient implements HConstants {
|
|||
* Get a scanner on the current table starting at the specified row.
|
||||
* Return the specified columns.
|
||||
*
|
||||
* @param columns - array of columns to return
|
||||
* @param startRow - starting row in table to scan
|
||||
* @return - scanner
|
||||
* @param columns array of columns to return
|
||||
* @param startRow starting row in table to scan
|
||||
* @return scanner
|
||||
* @throws IOException
|
||||
*/
|
||||
public synchronized HScannerInterface obtainScanner(Text[] columns,
|
||||
Text startRow) throws IOException {
|
||||
|
||||
Text startRow)
|
||||
throws IOException {
|
||||
return obtainScanner(columns, startRow, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a scanner on the current table starting at the specified row.
|
||||
* Return the specified columns.
|
||||
*
|
||||
* @param columns array of columns to return
|
||||
* @param startRow starting row in table to scan
|
||||
* @param filter a row filter using row-key regexp and/or column data filter.
|
||||
* @return scanner
|
||||
* @throws IOException
|
||||
*/
|
||||
public synchronized HScannerInterface obtainScanner(Text[] columns,
|
||||
Text startRow, RowFilterInterface filter)
|
||||
throws IOException {
|
||||
if(this.tableServers == null) {
|
||||
throw new IllegalStateException("Must open table first");
|
||||
}
|
||||
return new ClientScanner(columns, startRow);
|
||||
return new ClientScanner(columns, startRow, filter);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1388,6 +1405,7 @@ public class HClient implements HConstants {
|
|||
private int currentRegion;
|
||||
private HRegionInterface server;
|
||||
private long scannerId;
|
||||
private RowFilterInterface filter;
|
||||
|
||||
private void loadRegions() {
|
||||
Text firstServer = null;
|
||||
|
@ -1404,11 +1422,15 @@ public class HClient implements HConstants {
|
|||
this.regions = info.toArray(new RegionLocation[info.size()]);
|
||||
}
|
||||
|
||||
ClientScanner(Text[] columns, Text startRow) throws IOException {
|
||||
ClientScanner(Text[] columns, Text startRow, RowFilterInterface filter)
|
||||
throws IOException {
|
||||
this.columns = columns;
|
||||
this.startRow = startRow;
|
||||
this.closed = false;
|
||||
|
||||
this.filter = filter;
|
||||
if (filter != null) {
|
||||
filter.validate(columns);
|
||||
}
|
||||
loadRegions();
|
||||
this.currentRegion = -1;
|
||||
this.server = null;
|
||||
|
@ -1437,9 +1459,16 @@ public class HClient implements HConstants {
|
|||
RegionLocation info = this.regions[currentRegion];
|
||||
|
||||
try {
|
||||
this.scannerId = this.server.openScanner(info.regionInfo.regionName,
|
||||
this.columns, currentRegion == 0 ? this.startRow : EMPTY_START_ROW);
|
||||
|
||||
if (this.filter == null) {
|
||||
this.scannerId = this.server.openScanner(info.regionInfo.regionName,
|
||||
this.columns, currentRegion == 0 ? this.startRow
|
||||
: EMPTY_START_ROW);
|
||||
} else {
|
||||
this.scannerId = this.server.openScanner(info.regionInfo.regionName,
|
||||
this.columns, currentRegion == 0 ? this.startRow
|
||||
: EMPTY_START_ROW, filter);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
} catch(NotServingRegionException e) {
|
||||
|
|
|
@ -15,14 +15,26 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.Vector;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.io.*;
|
||||
import org.apache.hadoop.fs.*;
|
||||
import org.apache.hadoop.conf.*;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.filter.RowFilterInterface;
|
||||
import org.apache.hadoop.io.DataInputBuffer;
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
||||
/**
|
||||
* HRegion stores data for a certain region of a table. It stores all columns
|
||||
|
@ -974,6 +986,15 @@ public class HRegion implements HConstants {
|
|||
*/
|
||||
public HInternalScannerInterface getScanner(Text[] cols, Text firstRow)
|
||||
throws IOException {
|
||||
return getScanner(cols, firstRow, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an iterator that scans over the HRegion, returning the indicated
|
||||
* columns for only the rows that match the data filter. This Iterator must be closed by the caller.
|
||||
*/
|
||||
public HInternalScannerInterface getScanner(Text[] cols, Text firstRow, RowFilterInterface filter)
|
||||
throws IOException {
|
||||
lock.obtainReadLock();
|
||||
try {
|
||||
TreeSet<Text> families = new TreeSet<Text>();
|
||||
|
@ -986,7 +1007,7 @@ public class HRegion implements HConstants {
|
|||
for (Text family: families) {
|
||||
storelist[i++] = stores.get(family);
|
||||
}
|
||||
return new HScanner(cols, firstRow, memcache, storelist);
|
||||
return new HScanner(cols, firstRow, memcache, storelist, filter);
|
||||
} finally {
|
||||
lock.releaseReadLock();
|
||||
}
|
||||
|
@ -1262,12 +1283,17 @@ public class HRegion implements HConstants {
|
|||
private HStoreKey[] keys;
|
||||
private boolean wildcardMatch;
|
||||
private boolean multipleMatchers;
|
||||
private RowFilterInterface dataFilter;
|
||||
|
||||
/** Create an HScanner with a handle on many HStores. */
|
||||
@SuppressWarnings("unchecked")
|
||||
HScanner(Text[] cols, Text firstRow, HMemcache memcache, HStore[] stores)
|
||||
HScanner(Text[] cols, Text firstRow, HMemcache memcache, HStore[] stores, RowFilterInterface filter)
|
||||
throws IOException {
|
||||
long scanTime = System.currentTimeMillis();
|
||||
this.dataFilter = filter;
|
||||
if (null != dataFilter) {
|
||||
dataFilter.reset();
|
||||
}
|
||||
this.scanners = new HInternalScannerInterface[stores.length + 1];
|
||||
for(int i = 0; i < this.scanners.length; i++) {
|
||||
this.scanners[i] = null;
|
||||
|
@ -1335,84 +1361,135 @@ public class HRegion implements HConstants {
|
|||
return multipleMatchers;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* Grab the next row's worth of values. The HScanner will return the most
|
||||
* Grab the next row's worth of values. The HScanner will return the most
|
||||
* recent data value for each row that is not newer than the target time.
|
||||
*
|
||||
* @see org.apache.hadoop.hbase.HInternalScannerInterface#next(org.apache.hadoop.hbase.HStoreKey, java.util.TreeMap)
|
||||
*
|
||||
* If a dataFilter is defined, it will be used to skip rows that do not
|
||||
* match its criteria. It may cause the scanner to stop prematurely if it
|
||||
* knows that it will no longer accept the remaining results.
|
||||
*
|
||||
* @see org.apache.hadoop.hbase.HInternalScannerInterface#next(org.apache.hadoop.hbase.HStoreKey,
|
||||
* java.util.TreeMap)
|
||||
*/
|
||||
public boolean next(HStoreKey key, TreeMap<Text, byte []> results)
|
||||
public boolean next(HStoreKey key, TreeMap<Text, byte[]> results)
|
||||
throws IOException {
|
||||
// Find the lowest-possible key.
|
||||
Text chosenRow = null;
|
||||
long chosenTimestamp = -1;
|
||||
for(int i = 0; i < this.keys.length; i++) {
|
||||
if(scanners[i] != null
|
||||
&& (chosenRow == null
|
||||
|| (keys[i].getRow().compareTo(chosenRow) < 0)
|
||||
|| ((keys[i].getRow().compareTo(chosenRow) == 0)
|
||||
&& (keys[i].getTimestamp() > chosenTimestamp)))) {
|
||||
|
||||
chosenRow = new Text(keys[i].getRow());
|
||||
chosenTimestamp = keys[i].getTimestamp();
|
||||
boolean filtered = true;
|
||||
boolean moreToFollow = true;
|
||||
while (filtered && moreToFollow) {
|
||||
// Find the lowest-possible key.
|
||||
Text chosenRow = null;
|
||||
long chosenTimestamp = -1;
|
||||
for (int i = 0; i < this.keys.length; i++) {
|
||||
if (scanners[i] != null &&
|
||||
(chosenRow == null ||
|
||||
(keys[i].getRow().compareTo(chosenRow) < 0) ||
|
||||
((keys[i].getRow().compareTo(chosenRow) == 0) &&
|
||||
(keys[i].getTimestamp() > chosenTimestamp)))) {
|
||||
chosenRow = new Text(keys[i].getRow());
|
||||
chosenTimestamp = keys[i].getTimestamp();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store the key and results for each sub-scanner. Merge them as appropriate.
|
||||
boolean insertedItem = false;
|
||||
if(chosenTimestamp > 0) {
|
||||
key.setRow(chosenRow);
|
||||
key.setVersion(chosenTimestamp);
|
||||
key.setColumn(new Text(""));
|
||||
// Filter whole row by row key?
|
||||
filtered = dataFilter != null? dataFilter.filter(chosenRow) : false;
|
||||
|
||||
for(int i = 0; i < scanners.length; i++) {
|
||||
while((scanners[i] != null)
|
||||
&& (keys[i].getRow().compareTo(chosenRow) == 0)) {
|
||||
// If we are doing a wild card match or there are multiple matchers
|
||||
// per column, we need to scan all the older versions of this row
|
||||
// to pick up the rest of the family members
|
||||
|
||||
if(!wildcardMatch
|
||||
&& !multipleMatchers
|
||||
&& (keys[i].getTimestamp() != chosenTimestamp)) {
|
||||
break;
|
||||
}
|
||||
// Store the key and results for each sub-scanner. Merge them as
|
||||
// appropriate.
|
||||
if (chosenTimestamp > 0 && !filtered) {
|
||||
key.setRow(chosenRow);
|
||||
key.setVersion(chosenTimestamp);
|
||||
key.setColumn(new Text(""));
|
||||
|
||||
// NOTE: We used to do results.putAll(resultSets[i]);
|
||||
// but this had the effect of overwriting newer
|
||||
// values with older ones. So now we only insert
|
||||
// a result if the map does not contain the key.
|
||||
|
||||
for(Map.Entry<Text, byte []> e: resultSets[i].entrySet()) {
|
||||
if(!results.containsKey(e.getKey())) {
|
||||
results.put(e.getKey(), e.getValue());
|
||||
insertedItem = true;
|
||||
for (int i = 0; i < scanners.length && !filtered; i++) {
|
||||
|
||||
while ((scanners[i] != null
|
||||
&& !filtered
|
||||
&& moreToFollow)
|
||||
&& (keys[i].getRow().compareTo(chosenRow) == 0)) {
|
||||
// If we are doing a wild card match or there are multiple
|
||||
// matchers
|
||||
// per column, we need to scan all the older versions of this row
|
||||
// to pick up the rest of the family members
|
||||
|
||||
if (!wildcardMatch
|
||||
&& !multipleMatchers
|
||||
&& (keys[i].getTimestamp() != chosenTimestamp)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Filter out null criteria columns that are not null
|
||||
if (dataFilter != null) {
|
||||
filtered = dataFilter.filterNotNull(resultSets[i]);
|
||||
}
|
||||
|
||||
// NOTE: We used to do results.putAll(resultSets[i]);
|
||||
// but this had the effect of overwriting newer
|
||||
// values with older ones. So now we only insert
|
||||
// a result if the map does not contain the key.
|
||||
|
||||
for (Map.Entry<Text, byte[]> e : resultSets[i].entrySet()) {
|
||||
if (!filtered && moreToFollow &&
|
||||
!results.containsKey(e.getKey())) {
|
||||
if (dataFilter != null) {
|
||||
// Filter whole row by column data?
|
||||
filtered =
|
||||
dataFilter.filter(chosenRow, e.getKey(), e.getValue());
|
||||
if (filtered) {
|
||||
results.clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
results.put(e.getKey(), e.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
resultSets[i].clear();
|
||||
if (!scanners[i].next(keys[i], resultSets[i])) {
|
||||
closeScanner(i);
|
||||
}
|
||||
}
|
||||
|
||||
resultSets[i].clear();
|
||||
if(! scanners[i].next(keys[i], resultSets[i])) {
|
||||
closeScanner(i);
|
||||
// If the current scanner is non-null AND has a lower-or-equal
|
||||
// row label, then its timestamp is bad. We need to advance it.
|
||||
while ((scanners[i] != null) &&
|
||||
(keys[i].getRow().compareTo(chosenRow) <= 0)) {
|
||||
resultSets[i].clear();
|
||||
if (!scanners[i].next(keys[i], resultSets[i])) {
|
||||
closeScanner(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
moreToFollow = chosenTimestamp > 0;
|
||||
|
||||
if (dataFilter != null) {
|
||||
if (moreToFollow && !filtered) {
|
||||
dataFilter.acceptedRow(chosenRow);
|
||||
}
|
||||
|
||||
// If the current scanner is non-null AND has a lower-or-equal
|
||||
// row label, then its timestamp is bad. We need to advance it.
|
||||
|
||||
while((scanners[i] != null)
|
||||
&& (keys[i].getRow().compareTo(chosenRow) <= 0)) {
|
||||
|
||||
resultSets[i].clear();
|
||||
if(! scanners[i].next(keys[i], resultSets[i])) {
|
||||
closeScanner(i);
|
||||
}
|
||||
if (dataFilter.filterAllRemaining()) {
|
||||
moreToFollow = false;
|
||||
LOG.debug("page limit");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure scanners closed if no more results
|
||||
if (!moreToFollow) {
|
||||
for (int i = 0; i < scanners.length; i++) {
|
||||
if (null != scanners[i]) {
|
||||
closeScanner(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return insertedItem;
|
||||
|
||||
return moreToFollow;
|
||||
}
|
||||
|
||||
|
||||
/** Shut down a single scanner */
|
||||
void closeScanner(int i) {
|
||||
try {
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.hadoop.hbase;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.filter.RowFilterInterface;
|
||||
import org.apache.hadoop.hbase.io.KeyedData;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.ipc.VersionedProtocol;
|
||||
|
@ -201,6 +202,20 @@ public interface HRegionInterface extends VersionedProtocol {
|
|||
*/
|
||||
public long openScanner(Text regionName, Text[] columns, Text startRow)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
* Opens a remote scanner with a RowFilter.
|
||||
*
|
||||
* @param regionName name of region to scan
|
||||
* @param columns columns to scan
|
||||
* @param startRow starting row to scan
|
||||
* @param filter RowFilter for filtering results at the row-level.
|
||||
*
|
||||
* @return scannerId scanner identifier used in other calls
|
||||
* @throws IOException
|
||||
*/
|
||||
public long openScanner(Text regionName, Text[] columns, Text startRow, RowFilterInterface filter)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
* Get the next set of values
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.commons.logging.LogFactory;
|
|||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.filter.RowFilterInterface;
|
||||
import org.apache.hadoop.hbase.io.KeyedData;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.retry.RetryProxy;
|
||||
|
@ -1150,15 +1151,25 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||
}
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.hadoop.hbase.HRegionInterface#openScanner(org.apache.hadoop.io.Text, org.apache.hadoop.io.Text[], org.apache.hadoop.io.Text)
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public long openScanner(Text regionName, Text[] cols, Text firstRow)
|
||||
public long openScanner(final Text regionName, final Text[] cols,
|
||||
final Text firstRow)
|
||||
throws IOException{
|
||||
return openScanner(regionName, cols, firstRow, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public long openScanner(Text regionName, Text[] cols, Text firstRow,
|
||||
final RowFilterInterface filter)
|
||||
throws IOException {
|
||||
HRegion r = getRegion(regionName);
|
||||
long scannerId = -1L;
|
||||
try {
|
||||
HInternalScannerInterface s = r.getScanner(cols, firstRow);
|
||||
HInternalScannerInterface s = r.getScanner(cols, firstRow, filter);
|
||||
scannerId = rand.nextLong();
|
||||
String scannerName = String.valueOf(scannerId);
|
||||
synchronized(scanners) {
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
/**
|
||||
* Used to indicate an invalid RowFilter.
|
||||
*/
|
||||
public class InvalidRowFilterException extends RuntimeException {
|
||||
private static final long serialVersionUID = 2667894046345657865L;
|
||||
|
||||
public InvalidRowFilterException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public InvalidRowFilterException(String s) {
|
||||
super(s);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
||||
/**
|
||||
* Implementation of RowFilterInterface that limits results to a specific page
|
||||
* size. It terminates scanning once the number of filter-passed results is >=
|
||||
* the given page size.
|
||||
*
|
||||
* <p>
|
||||
* Note that this filter cannot guarantee that the number of results returned
|
||||
* to a client are <= page size. This is because the filter is applied
|
||||
* separately on different region servers. It does however optimize the scan of
|
||||
* individual HRegions by making sure that the page size is never exceeded
|
||||
* locally.
|
||||
* </p>
|
||||
*/
|
||||
public class PageRowFilter implements RowFilterInterface {
|
||||
|
||||
private long pageSize = Long.MAX_VALUE;
|
||||
private int rowsAccepted = 0;
|
||||
|
||||
/**
|
||||
* Default constructor, filters nothing. Required though for RPC
|
||||
* deserialization.
|
||||
*/
|
||||
public PageRowFilter() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that takes a maximum page size.
|
||||
*
|
||||
* @param pageSize Maximum result size.
|
||||
*/
|
||||
public PageRowFilter(final long pageSize) {
|
||||
this.pageSize = pageSize;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void validate(@SuppressWarnings("unused") final Text[] columns) {
|
||||
// Doesn't filter columns
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void reset() {
|
||||
rowsAccepted = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void acceptedRow(@SuppressWarnings("unused") final Text key) {
|
||||
rowsAccepted++;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filterAllRemaining() {
|
||||
if (this.rowsAccepted > this.pageSize) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filter(@SuppressWarnings("unused") final Text rowKey) {
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filter(@SuppressWarnings("unused") final Text rowKey,
|
||||
@SuppressWarnings("unused") final Text colKey,
|
||||
@SuppressWarnings("unused") final byte[] data) {
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filterNotNull(@SuppressWarnings("unused")
|
||||
final TreeMap<Text, byte[]> columns) {
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
this.pageSize = in.readLong();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
out.writeLong(pageSize);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,300 @@
|
|||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
||||
/**
|
||||
* Implementation of RowFilterInterface that can filter by rowkey regular
|
||||
* expression and/or individual column values (equals comparison only).
|
||||
* Multiple column filters imply an implicit conjunction of filter criteria.
|
||||
*/
|
||||
public class RegExpRowFilter implements RowFilterInterface {
|
||||
|
||||
private Pattern rowKeyPattern = null;
|
||||
private String rowKeyRegExp = null;
|
||||
|
||||
private Map<Text, byte[]> equalsMap = new HashMap<Text, byte[]>();
|
||||
private Set<Text> nullColumns = new HashSet<Text>();
|
||||
|
||||
/**
|
||||
* Default constructor, filters nothing. Required though for RPC
|
||||
* deserialization.
|
||||
*/
|
||||
public RegExpRowFilter() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that takes a row key regular expression to filter on.
|
||||
*
|
||||
* @param rowKeyRegExp
|
||||
*/
|
||||
public RegExpRowFilter(final String rowKeyRegExp) {
|
||||
this.rowKeyRegExp = rowKeyRegExp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that takes a row key regular expression to filter on.
|
||||
*
|
||||
* @param rowKeyRegExp
|
||||
* @param columnFilter
|
||||
*/
|
||||
public RegExpRowFilter(final String rowKeyRegExp,
|
||||
final Map<Text, byte[]> columnFilter) {
|
||||
this.rowKeyRegExp = rowKeyRegExp;
|
||||
this.setColumnFilters(columnFilter);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void acceptedRow(@SuppressWarnings("unused") final Text key) {
|
||||
//doesn't care
|
||||
}
|
||||
|
||||
/**
|
||||
* Specify a value that must be matched for the given column.
|
||||
*
|
||||
* @param colKey
|
||||
* the column to match on
|
||||
* @param value
|
||||
* the value that must equal the stored value.
|
||||
*/
|
||||
public void setColumnFilter(final Text colKey, final byte[] value) {
|
||||
if (null == value) {
|
||||
nullColumns.add(colKey);
|
||||
} else {
|
||||
equalsMap.put(colKey, value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set column filters for a number of columns.
|
||||
*
|
||||
* @param columnFilter
|
||||
* Map of columns with value criteria.
|
||||
*/
|
||||
public void setColumnFilters(final Map<Text, byte[]> columnFilter) {
|
||||
if (null == columnFilter) {
|
||||
nullColumns.clear();
|
||||
equalsMap.clear();
|
||||
} else {
|
||||
for (Entry<Text, byte[]> entry : columnFilter.entrySet()) {
|
||||
setColumnFilter(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void reset() {
|
||||
// Nothing to reset
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filterAllRemaining() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filter(final Text rowKey) {
|
||||
if (filtersByRowKey() && rowKey != null) {
|
||||
return !getRowKeyPattern().matcher(rowKey.toString()).matches();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filter(final Text rowKey, final Text colKey,
|
||||
final byte[] data) {
|
||||
if (filter(rowKey)) {
|
||||
return true;
|
||||
}
|
||||
if (filtersByColumnValue()) {
|
||||
byte[] filterValue = equalsMap.get(colKey);
|
||||
if (null != filterValue) {
|
||||
return !Arrays.equals(filterValue, data);
|
||||
}
|
||||
}
|
||||
if (nullColumns.contains(colKey)) {
|
||||
if (data != null && !Arrays.equals(HConstants.DELETE_BYTES.get(), data)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filterNotNull(final TreeMap<Text, byte[]> columns) {
|
||||
for (Entry<Text, byte[]> col : columns.entrySet()) {
|
||||
if (nullColumns.contains(col.getKey())
|
||||
&& !Arrays.equals(HConstants.DELETE_BYTES.get(), col.getValue())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
for (Text col : equalsMap.keySet()) {
|
||||
if (!columns.containsKey(col)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean filtersByColumnValue() {
|
||||
return equalsMap != null && equalsMap.size() > 0;
|
||||
}
|
||||
|
||||
private boolean filtersByRowKey() {
|
||||
return null != rowKeyPattern || null != rowKeyRegExp;
|
||||
}
|
||||
|
||||
private String getRowKeyRegExp() {
|
||||
if (null == rowKeyRegExp && rowKeyPattern != null) {
|
||||
rowKeyRegExp = rowKeyPattern.toString();
|
||||
}
|
||||
return rowKeyRegExp;
|
||||
}
|
||||
|
||||
private Pattern getRowKeyPattern() {
|
||||
if (rowKeyPattern == null && rowKeyRegExp != null) {
|
||||
rowKeyPattern = Pattern.compile(rowKeyRegExp);
|
||||
}
|
||||
return rowKeyPattern;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
boolean hasRowKeyPattern = in.readBoolean();
|
||||
if (hasRowKeyPattern) {
|
||||
rowKeyRegExp = in.readLine();
|
||||
}
|
||||
// equals map
|
||||
equalsMap.clear();
|
||||
int size = in.readInt();
|
||||
for (int i = 0; i < size; i++) {
|
||||
Text key = new Text();
|
||||
key.readFields(in);
|
||||
int len = in.readInt();
|
||||
byte[] value = null;
|
||||
if (len >= 0) {
|
||||
value = new byte[len];
|
||||
in.readFully(value);
|
||||
}
|
||||
setColumnFilter(key, value);
|
||||
}
|
||||
// nullColumns
|
||||
nullColumns.clear();
|
||||
size = in.readInt();
|
||||
for (int i = 0; i < size; i++) {
|
||||
Text key = new Text();
|
||||
key.readFields(in);
|
||||
setColumnFilter(key, null);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void validate(final Text[] columns) {
|
||||
Set<Text> invalids = new HashSet<Text>();
|
||||
for (Text colKey : getFilterColumns()) {
|
||||
boolean found = false;
|
||||
for (Text col : columns) {
|
||||
if (col.equals(colKey)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
invalids.add(colKey);
|
||||
}
|
||||
}
|
||||
|
||||
if (invalids.size() > 0) {
|
||||
throw new InvalidRowFilterException(String.format(
|
||||
"RowFilter contains criteria on columns %s not in %s", invalids,
|
||||
Arrays.toString(columns)));
|
||||
}
|
||||
}
|
||||
|
||||
private Set<Text> getFilterColumns() {
|
||||
Set<Text> cols = new HashSet<Text>();
|
||||
cols.addAll(equalsMap.keySet());
|
||||
cols.addAll(nullColumns);
|
||||
return cols;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
if (!filtersByRowKey()) {
|
||||
out.writeBoolean(false);
|
||||
} else {
|
||||
out.writeBoolean(true);
|
||||
out.writeChars(getRowKeyRegExp());
|
||||
}
|
||||
|
||||
// equalsMap
|
||||
out.writeInt(equalsMap.size());
|
||||
for (Entry<Text, byte[]> entry : equalsMap.entrySet()) {
|
||||
entry.getKey().write(out);
|
||||
byte[] value = entry.getValue();
|
||||
out.writeInt(value.length);
|
||||
out.write(value);
|
||||
}
|
||||
|
||||
// null columns
|
||||
out.writeInt(nullColumns.size());
|
||||
for (Text col : nullColumns) {
|
||||
col.write(out);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.hadoop.hbase.HRegion;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/**
|
||||
*
|
||||
* Interface used for row-level filters applied to HRegion.HScanner scan
|
||||
* results during calls to next().
|
||||
*/
|
||||
public interface RowFilterInterface extends Writable {
|
||||
|
||||
/**
|
||||
* Resets the state of the filter. Used prior to the start of a Region scan.
|
||||
*
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/**
|
||||
* Called to let filter know that the specified row has been included in the
|
||||
* results (passed all filtering). With out HScanner calling this, the filter
|
||||
* does not know if a row passed filtering even if it passed the row itself
|
||||
* because other filters may have failed the row. E.g. when this filter is a
|
||||
* member of a RowFilterSet with an OR operator.
|
||||
*
|
||||
* @see RowFilterSet
|
||||
* @param key
|
||||
*/
|
||||
void acceptedRow(final Text key);
|
||||
|
||||
/**
|
||||
* Determines if the filter has decided that all remaining results should be
|
||||
* filtered (skipped). This is used to prevent the scanner from scanning a
|
||||
* the rest of the HRegion when for sure the filter will exclude all
|
||||
* remaining rows.
|
||||
*
|
||||
* @return true if the filter intends to filter all remaining rows.
|
||||
*/
|
||||
boolean filterAllRemaining();
|
||||
|
||||
/**
|
||||
* Filters on just a row key.
|
||||
*
|
||||
* @param rowKey
|
||||
* @return true if given row key is filtered and row should not be processed.
|
||||
*/
|
||||
boolean filter(final Text rowKey);
|
||||
|
||||
/**
|
||||
* Filters on row key and/or a column key.
|
||||
*
|
||||
* @param rowKey
|
||||
* row key to filter on. May be null for no filtering of row key.
|
||||
* @param colKey
|
||||
* column whose data will be filtered
|
||||
* @param data
|
||||
* column value
|
||||
* @return true if row filtered and should not be processed.
|
||||
*/
|
||||
boolean filter(final Text rowKey, final Text colKey, final byte[] data);
|
||||
|
||||
/**
|
||||
* Filters row if given columns are non-null and have null criteria or if
|
||||
* there exists criteria on columns not included in the column set. A column
|
||||
* is considered null if it:
|
||||
* <ul>
|
||||
* <li>Is not included in the given columns.</li>
|
||||
* <li>Has a value of HConstants.DELETE_BYTES</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param columns
|
||||
* @return true if null/non-null criteria not met.
|
||||
*/
|
||||
boolean filterNotNull(final TreeMap<Text, byte[]> columns);
|
||||
|
||||
/**
|
||||
* Validates that this filter applies only to a subset of the given columns.
|
||||
* This check is done prior to opening of scanner due to the limitation that
|
||||
* filtering of columns is dependent on the retrieval of those columns within
|
||||
* the HRegion. Criteria on columns that are not part of a scanner's column
|
||||
* list will be ignored. In the case of null value filters, all rows will pass
|
||||
* the filter. This behavior should be 'undefined' for the user and therefore
|
||||
* not permitted.
|
||||
*
|
||||
* @param columns
|
||||
*/
|
||||
void validate(final Text[] columns);
|
||||
}
|
|
@ -0,0 +1,230 @@
|
|||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
||||
/**
|
||||
* Implementation of RowFilterInterface that represents a set of RowFilters
|
||||
* which will be evaluated with a specified boolean operator AND/OR. Since you
|
||||
* can use RowFilterSets as children of RowFilterSet, you can create a
|
||||
* hierarchy of filters to be evaluated.
|
||||
*/
|
||||
public class RowFilterSet implements RowFilterInterface {
|
||||
|
||||
enum Operator {
|
||||
AND, OR
|
||||
}
|
||||
|
||||
private Operator operator = Operator.AND;
|
||||
private Set<RowFilterInterface> filters = new HashSet<RowFilterInterface>();
|
||||
|
||||
/**
|
||||
* Default constructor, filters nothing. Required though for RPC
|
||||
* deserialization.
|
||||
*/
|
||||
public RowFilterSet() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that takes a set of RowFilters. The default operator AND is
|
||||
* assumed.
|
||||
*
|
||||
* @param rowFilters
|
||||
*/
|
||||
public RowFilterSet(final Set<RowFilterInterface> rowFilters) {
|
||||
this.filters = rowFilters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that takes a set of RowFilters and an operator.
|
||||
*
|
||||
* @param operator Operator to process filter set with.
|
||||
* @param rowFilters Set of row filters.
|
||||
*/
|
||||
public RowFilterSet(final Operator operator,
|
||||
final Set<RowFilterInterface> rowFilters) {
|
||||
this.filters = rowFilters;
|
||||
this.operator = operator;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void validate(final Text[] columns) {
|
||||
for (RowFilterInterface filter : filters) {
|
||||
filter.validate(columns);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void reset() {
|
||||
for (RowFilterInterface filter : filters) {
|
||||
filter.reset();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void acceptedRow(final Text key) {
|
||||
for (RowFilterInterface filter : filters) {
|
||||
filter.acceptedRow(key);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filterAllRemaining() {
|
||||
boolean result = operator == Operator.OR;
|
||||
for (RowFilterInterface filter : filters) {
|
||||
if (operator == Operator.AND) {
|
||||
if (filter.filterAllRemaining()) {
|
||||
return true;
|
||||
}
|
||||
} else if (operator == Operator.OR) {
|
||||
if (!filter.filterAllRemaining()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filter(final Text rowKey) {
|
||||
boolean result = operator == Operator.OR;
|
||||
for (RowFilterInterface filter : filters) {
|
||||
if (operator == Operator.AND) {
|
||||
if (filter.filterAllRemaining() || filter.filter(rowKey)) {
|
||||
return true;
|
||||
}
|
||||
} else if (operator == Operator.OR) {
|
||||
if (!filter.filterAllRemaining() && !filter.filter(rowKey)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filter(final Text rowKey, final Text colKey, final byte[] data) {
|
||||
boolean result = operator == Operator.OR;
|
||||
for (RowFilterInterface filter : filters) {
|
||||
if (operator == Operator.AND) {
|
||||
if (filter.filterAllRemaining() || filter.filter(rowKey, colKey, data)) {
|
||||
return true;
|
||||
}
|
||||
} else if (operator == Operator.OR) {
|
||||
if (!filter.filterAllRemaining()
|
||||
&& !filter.filter(rowKey, colKey, data)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public boolean filterNotNull(final TreeMap<Text, byte[]> columns) {
|
||||
boolean result = operator == Operator.OR;
|
||||
for (RowFilterInterface filter : filters) {
|
||||
if (operator == Operator.AND) {
|
||||
if (filter.filterAllRemaining() || filter.filterNotNull(columns)) {
|
||||
return true;
|
||||
}
|
||||
} else if (operator == Operator.OR) {
|
||||
if (!filter.filterAllRemaining() && !filter.filterNotNull(columns)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
byte opByte = in.readByte();
|
||||
operator = Operator.values()[opByte];
|
||||
int size = in.readInt();
|
||||
if (size > 0) {
|
||||
filters = new HashSet<RowFilterInterface>();
|
||||
try {
|
||||
for (int i = 0; i < size; i++) {
|
||||
String className = in.readUTF();
|
||||
Class<?> clazz = Class.forName(className);
|
||||
RowFilterInterface filter;
|
||||
filter = (RowFilterInterface) clazz.newInstance();
|
||||
filter.readFields(in);
|
||||
filters.add(filter);
|
||||
}
|
||||
} catch (InstantiationException e) {
|
||||
throw new RuntimeException("Failed to deserialize RowFilterInterface.",
|
||||
e);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new RuntimeException("Failed to deserialize RowFilterInterface.",
|
||||
e);
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new RuntimeException("Failed to deserialize RowFilterInterface.",
|
||||
e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
out.writeByte(operator.ordinal());
|
||||
out.writeInt(filters.size());
|
||||
for (RowFilterInterface filter : filters) {
|
||||
out.writeUTF(filter.getClass().getName());
|
||||
filter.write(out);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
public class TestPageRowFilter extends TestCase {
|
||||
public void testPageSize() throws Exception {
|
||||
final int pageSize = 3;
|
||||
RowFilterInterface filter = new PageRowFilter(pageSize);
|
||||
testFiltersBeyondPageSize(filter, pageSize);
|
||||
// Test reset works by going in again.
|
||||
filter.reset();
|
||||
testFiltersBeyondPageSize(filter, pageSize);
|
||||
}
|
||||
|
||||
private void testFiltersBeyondPageSize(final RowFilterInterface filter,
|
||||
final int pageSize) {
|
||||
for (int i = 0; i < (pageSize * 2); i++) {
|
||||
Text row = new Text(Integer.toString(i));
|
||||
boolean filterOut = filter.filter(row);
|
||||
if (!filterOut) {
|
||||
assertFalse("Disagrees with 'filter'", filter.filterAllRemaining());
|
||||
filter.acceptedRow(row);
|
||||
} else {
|
||||
// Once we have all for a page, calls to filterAllRemaining should
|
||||
// stay true.
|
||||
assertTrue("Disagrees with 'filter'", filter.filterAllRemaining());
|
||||
assertTrue(i >= pageSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
||||
public class TestRegExpRowFilter extends TestCase {
|
||||
TreeMap<Text, byte []> colvalues;
|
||||
RowFilterInterface filter;
|
||||
final char FIRST_CHAR = 'a';
|
||||
final char LAST_CHAR = 'e';
|
||||
byte [] GOOD_BYTES = "abc".getBytes();
|
||||
final String HOST_PREFIX = "org.apache.site-";
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
this.colvalues = new TreeMap<Text, byte[]>();
|
||||
for (char c = FIRST_CHAR; c < LAST_CHAR; c++) {
|
||||
colvalues.put(new Text(new String(new char [] {c})), GOOD_BYTES);
|
||||
}
|
||||
this.filter = new RegExpRowFilter(HOST_PREFIX + ".*", colvalues);
|
||||
}
|
||||
|
||||
public void testRegexOnRow() throws Exception {
|
||||
for (char c = FIRST_CHAR; c <= LAST_CHAR; c++) {
|
||||
Text t = createRow(c);
|
||||
assertFalse("Failed with characer " + c, filter.filter(t));
|
||||
}
|
||||
String yahooSite = "com.yahoo.www";
|
||||
assertTrue("Failed with character " +
|
||||
yahooSite, filter.filter(new Text(yahooSite)));
|
||||
}
|
||||
|
||||
public void testRegexOnRowAndColumn() throws Exception {
|
||||
for (char c = FIRST_CHAR; c <= LAST_CHAR; c++) {
|
||||
Text t = createRow(c);
|
||||
for (Map.Entry<Text, byte []> e: this.colvalues.entrySet()) {
|
||||
assertFalse("Failed on " + c,
|
||||
this.filter.filter(t, e.getKey(), e.getValue()));
|
||||
}
|
||||
}
|
||||
// Try a row and column I know will pass.
|
||||
char c = 'c';
|
||||
Text r = createRow(c);
|
||||
Text col = new Text(Character.toString(c));
|
||||
assertFalse("Failed with character " + c,
|
||||
filter.filter(r, col, GOOD_BYTES));
|
||||
// Do same but with bad bytes.
|
||||
assertTrue("Failed with character " + c,
|
||||
filter.filter(r, col, "badbytes".getBytes()));
|
||||
// Do with good bytes but bad column name. Should not filter out.
|
||||
assertFalse("Failed with character " + c,
|
||||
filter.filter(r, new Text("badcolumn"), GOOD_BYTES));
|
||||
// Good column, good bytes but bad row.
|
||||
assertTrue("Failed with character " + c,
|
||||
filter.filter(new Text("bad row"), new Text("badcolumn"), GOOD_BYTES));
|
||||
}
|
||||
|
||||
private Text createRow(final char c) {
|
||||
return new Text(HOST_PREFIX + Character.toString(c));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue