HADOOP-1637 ] Fix to HScanner to Support Filters, Add Filter Tests to

TestScanner2


git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@558897 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2007-07-23 23:33:05 +00:00
parent 377bf72458
commit 43e253359a
3 changed files with 140 additions and 22 deletions

View File

@ -71,3 +71,5 @@ Trunk (unreleased changes)
(Izaak Rubin via Stack) (Izaak Rubin via Stack)
46. HADOOP-1579 Add new WhileMatchRowFilter and StopRowFilter filters 46. HADOOP-1579 Add new WhileMatchRowFilter and StopRowFilter filters
(Izaak Rubin via Stack) (Izaak Rubin via Stack)
47. HADOOP-1637 Fix to HScanner to Support Filters, Add Filter Tests to
TestScanner2 (Izaak Rubin via Stack)

View File

@ -1339,34 +1339,34 @@ public class HRegion implements HConstants {
try { try {
HInternalScannerInterface scanner = HInternalScannerInterface scanner =
memcache.getScanner(timestamp, cols, firstRow); memcache.getScanner(timestamp, cols, firstRow);
if(scanner.isWildcardScanner()) { if (scanner.isWildcardScanner()) {
this.wildcardMatch = true; this.wildcardMatch = true;
} }
if(scanner.isMultipleMatchScanner()) { if (scanner.isMultipleMatchScanner()) {
this.multipleMatchers = true; this.multipleMatchers = true;
} }
scanners[0] = scanner; scanners[0] = scanner;
for(int i = 0; i < stores.length; i++) { for (int i = 0; i < stores.length; i++) {
scanner = stores[i].getScanner(timestamp, cols, firstRow); scanner = stores[i].getScanner(timestamp, cols, firstRow);
if(scanner.isWildcardScanner()) { if (scanner.isWildcardScanner()) {
this.wildcardMatch = true; this.wildcardMatch = true;
} }
if(scanner.isMultipleMatchScanner()) { if (scanner.isMultipleMatchScanner()) {
this.multipleMatchers = true; this.multipleMatchers = true;
} }
scanners[i + 1] = scanner; scanners[i + 1] = scanner;
} }
} catch(IOException e) { } catch(IOException e) {
for(int i = 0; i < this.scanners.length; i++) { for (int i = 0; i < this.scanners.length; i++) {
if(scanners[i] != null) { if(scanners[i] != null) {
closeScanner(i); closeScanner(i);
} }
} }
throw e; throw e;
} }
for(int i = 0; i < scanners.length; i++) { for (int i = 0; i < scanners.length; i++) {
keys[i] = new HStoreKey(); keys[i] = new HStoreKey();
resultSets[i] = new TreeMap<Text, byte []>(); resultSets[i] = new TreeMap<Text, byte []>();
if(scanners[i] != null && !scanners[i].next(keys[i], resultSets[i])) { if(scanners[i] != null && !scanners[i].next(keys[i], resultSets[i])) {
@ -1428,9 +1428,8 @@ public class HRegion implements HConstants {
&& moreToFollow) && moreToFollow)
&& (keys[i].getRow().compareTo(chosenRow) == 0)) { && (keys[i].getRow().compareTo(chosenRow) == 0)) {
// If we are doing a wild card match or there are multiple // If we are doing a wild card match or there are multiple
// matchers // matchers per column, we need to scan all the older versions of
// per column, we need to scan all the older versions of this row // this row to pick up the rest of the family members
// to pick up the rest of the family members
if (!wildcardMatch if (!wildcardMatch
&& !multipleMatchers && !multipleMatchers
@ -1469,19 +1468,21 @@ public class HRegion implements HConstants {
closeScanner(i); closeScanner(i);
} }
} }
// If the current scanner is non-null AND has a lower-or-equal
// row label, then its timestamp is bad. We need to advance it.
while ((scanners[i] != null) &&
(keys[i].getRow().compareTo(chosenRow) <= 0)) {
resultSets[i].clear();
if (!scanners[i].next(keys[i], resultSets[i])) {
closeScanner(i);
}
}
} }
} }
for (int i = 0; i < scanners.length; i++) {
// If the current scanner is non-null AND has a lower-or-equal
// row label, then its timestamp is bad. We need to advance it.
while ((scanners[i] != null) &&
(keys[i].getRow().compareTo(chosenRow) <= 0)) {
resultSets[i].clear();
if (!scanners[i].next(keys[i], resultSets[i])) {
closeScanner(i);
}
}
}
moreToFollow = chosenTimestamp > 0; moreToFollow = chosenTimestamp > 0;
if (dataFilter != null) { if (dataFilter != null) {
@ -1492,7 +1493,10 @@ public class HRegion implements HConstants {
moreToFollow = false; moreToFollow = false;
LOG.debug("page limit"); LOG.debug("page limit");
} }
} }
if (LOG.isDebugEnabled()) {
LOG.debug("ROWKEY = " + chosenRow + ", FILTERED = " + filtered);
}
} }
// Make sure scanners closed if no more results // Make sure scanners closed if no more results
@ -1507,7 +1511,7 @@ public class HRegion implements HConstants {
return moreToFollow; return moreToFollow;
} }
/** Shut down a single scanner */ /** Shut down a single scanner */
void closeScanner(int i) { void closeScanner(int i) {
try { try {

View File

@ -21,12 +21,21 @@ package org.apache.hadoop.hbase;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.filter.RegExpRowFilter;
import org.apache.hadoop.hbase.filter.RowFilterInterface;
import org.apache.hadoop.hbase.filter.RowFilterSet;
import org.apache.hadoop.hbase.filter.StopRowFilter;
import org.apache.hadoop.hbase.filter.WhileMatchRowFilter;
import org.apache.hadoop.hbase.io.KeyedData; import org.apache.hadoop.hbase.io.KeyedData;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
@ -39,6 +48,109 @@ import org.apache.hadoop.io.Text;
public class TestScanner2 extends HBaseClusterTestCase { public class TestScanner2 extends HBaseClusterTestCase {
final Log LOG = LogFactory.getLog(this.getClass().getName()); final Log LOG = LogFactory.getLog(this.getClass().getName());
final char FIRST_ROWKEY = 'a';
final char FIRST_BAD_RANGE_ROWKEY = 'j';
final char LAST_BAD_RANGE_ROWKEY = 'q';
final char LAST_ROWKEY = 'z';
final char FIRST_COLKEY = '0';
final char LAST_COLKEY = '3';
final byte[] GOOD_BYTES = "goodstuff".getBytes();
final byte[] BAD_BYTES = "badstuff".getBytes();
/**
* Test the scanner's handling of various filters.
*
* @throws Exception
*/
public void testScannerFilter() throws Exception {
// Setup HClient, ensure that it is running correctly
HClient client = new HClient(this.conf);
// Setup colkeys to be inserted
HTableDescriptor htd = new HTableDescriptor(getName());
Text tableName = new Text(getName());
Text[] colKeys = new Text[(int)(LAST_COLKEY - FIRST_COLKEY) + 1];
for (char i = 0; i < colKeys.length; i++) {
colKeys[i] = new Text(new String(new char[] {
(char)(FIRST_COLKEY + i), ':' }));
htd.addFamily(new HColumnDescriptor(colKeys[i].toString()));
}
client.createTable(htd);
assertTrue("Table with name " + tableName + " created successfully.",
client.tableExists(tableName));
assertTrue("Master is running.", client.isMasterRunning());
// Enter data
client.openTable(tableName);
for (char i = FIRST_ROWKEY; i <= LAST_ROWKEY; i++) {
Text rowKey = new Text(new String(new char[] { i }));
long lockID = client.startUpdate(rowKey);
for (char j = 0; j < colKeys.length; j++) {
client.put(lockID, colKeys[j], (i >= FIRST_BAD_RANGE_ROWKEY &&
i <= LAST_BAD_RANGE_ROWKEY)? BAD_BYTES : GOOD_BYTES);
}
client.commit(lockID);
}
regExpFilterTest(client, colKeys);
rowFilterSetTest(client, colKeys);
}
private void regExpFilterTest(HClient client, Text[] colKeys)
throws Exception {
// Get the filter. The RegExpRowFilter used should filter out vowels.
Map<Text, byte[]> colCriteria = new TreeMap<Text, byte[]>();
for (int i = 0; i < colKeys.length; i++) {
colCriteria.put(colKeys[i], GOOD_BYTES);
}
RowFilterInterface filter = new RegExpRowFilter("[^aeiou]", colCriteria);
// Create the scanner from the filter.
HScannerInterface scanner = client.obtainScanner(colKeys, new Text(new
String(new char[] { FIRST_ROWKEY })), filter);
// Iterate over the scanner, ensuring that results match the passed regex.
iterateOnScanner(scanner, "[^aei-qu]");
}
private void rowFilterSetTest(HClient client, Text[] colKeys)
throws Exception {
// Get the filter. The RegExpRowFilter used should filter out vowels and
// the WhileMatchRowFilter(StopRowFilter) should filter out all rows
// greater than or equal to 'r'.
Set<RowFilterInterface> filterSet = new HashSet<RowFilterInterface>();
filterSet.add(new RegExpRowFilter("[^aeiou]"));
filterSet.add(new WhileMatchRowFilter(new StopRowFilter(new Text("r"))));
RowFilterInterface filter =
new RowFilterSet(RowFilterSet.Operator.MUST_PASS_ALL, filterSet);
// Create the scanner from the filter.
HScannerInterface scanner = client.obtainScanner(colKeys, new Text(new
String(new char[] { FIRST_ROWKEY })), filter);
// Iterate over the scanner, ensuring that results match the passed regex.
iterateOnScanner(scanner, "[^aeior-z]");
}
private void iterateOnScanner(HScannerInterface scanner, String regexToMatch)
throws Exception {
// A pattern that will only match rows that should not have been filtered.
Pattern p = Pattern.compile(regexToMatch);
try {
// Use the scanner to ensure all results match the above pattern.
HStoreKey rowKey = new HStoreKey();
TreeMap<Text, byte[]> columns = new TreeMap<Text, byte[]>();
while (scanner.next(rowKey, columns)) {
String key = rowKey.getRow().toString();
assertTrue("Shouldn't have extracted '" + key + "'",
p.matcher(key).matches());
}
} finally {
scanner.close();
}
}
/** /**
* Test scanning of META table around split. * Test scanning of META table around split.
* There was a problem where only one of the splits showed in a scan. * There was a problem where only one of the splits showed in a scan.