HBASE-3232 : Fix KeyOnlyFilter + Add Value Length (Nicolas via Ryan)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1035128 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan Rawson 2010-11-15 03:10:31 +00:00
parent ed06c534a0
commit 069fecaeb7
7 changed files with 100 additions and 45 deletions

View File

@ -679,6 +679,7 @@ Release 0.90.0 - Unreleased
HBASE-3221 Race between splitting and disabling HBASE-3221 Race between splitting and disabling
HBASE-3224 NPE in KeyValue$KVComparator.compare when compacting HBASE-3224 NPE in KeyValue$KVComparator.compare when compacting
HBASE-3233 Fix Long Running Stats HBASE-3233 Fix Long Running Stats
HBASE-3232 : Fix KeyOnlyFilter + Add Value Length (Nicolas via Ryan)
IMPROVEMENTS IMPROVEMENTS

View File

@ -1195,13 +1195,19 @@ public class KeyValue implements Writable, HeapSize {
* <p> * <p>
* This method is used by <code>KeyOnlyFilter</code> and is an advanced feature of * This method is used by <code>KeyOnlyFilter</code> and is an advanced feature of
* KeyValue, proceed with caution. * KeyValue, proceed with caution.
* @param lenAsVal replace value with the actual value length (false=empty)
*/ */
public void convertToKeyOnly() { public void convertToKeyOnly(boolean lenAsVal) {
// KV format: <keylen/4><valuelen/4><key/keylen><value/valuelen> // KV format: <keylen:4><valuelen:4><key:keylen><value:valuelen>
// Rebuild as: <keylen/4><0/4><key/keylen> // Rebuild as: <keylen:4><0:4><key:keylen>
byte [] newBuffer = new byte[getKeyLength() + (2 * Bytes.SIZEOF_INT)]; int dataLen = lenAsVal? Bytes.SIZEOF_INT : 0;
System.arraycopy(this.bytes, this.offset, newBuffer, 0, newBuffer.length); byte [] newBuffer = new byte[getKeyLength() + (2 * Bytes.SIZEOF_INT) + dataLen];
Bytes.putInt(newBuffer, Bytes.SIZEOF_INT, 0); System.arraycopy(this.bytes, this.offset, newBuffer, 0,
Math.min(newBuffer.length,this.length));
Bytes.putInt(newBuffer, Bytes.SIZEOF_INT, dataLen);
if (lenAsVal) {
Bytes.putInt(newBuffer, newBuffer.length - dataLen, this.getValueLength());
}
this.bytes = newBuffer; this.bytes = newBuffer;
this.offset = 0; this.offset = 0;
this.length = newBuffer.length; this.length = newBuffer.length;

View File

@ -23,6 +23,7 @@ import java.io.DataInput;
import java.io.DataOutput; import java.io.DataOutput;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
/** /**
* A filter that will only return the key component of each KV (the value will * A filter that will only return the key component of each KV (the value will
@ -33,15 +34,21 @@ import org.apache.hadoop.hbase.KeyValue;
*/ */
public class KeyOnlyFilter extends FilterBase { public class KeyOnlyFilter extends FilterBase {
public KeyOnlyFilter() {} boolean lenAsVal;
public KeyOnlyFilter() { this(false); }
public KeyOnlyFilter(boolean lenAsVal) { this.lenAsVal = lenAsVal; }
@Override @Override
public ReturnCode filterKeyValue(KeyValue kv) { public ReturnCode filterKeyValue(KeyValue kv) {
kv.convertToKeyOnly(); kv.convertToKeyOnly(this.lenAsVal);
return ReturnCode.INCLUDE; return ReturnCode.INCLUDE;
} }
public void write(DataOutput out) throws IOException {} public void write(DataOutput out) throws IOException {
out.writeBoolean(this.lenAsVal);
}
public void readFields(DataInput in) throws IOException {} public void readFields(DataInput in) throws IOException {
this.lenAsVal = in.readBoolean();
}
} }

View File

@ -252,12 +252,14 @@ class StoreScanner implements KeyValueScanner, InternalScanner, ChangedReadersOb
KeyValue kv; KeyValue kv;
List<KeyValue> results = new ArrayList<KeyValue>(); List<KeyValue> results = new ArrayList<KeyValue>();
LOOP: while((kv = this.heap.peek()) != null) { LOOP: while((kv = this.heap.peek()) != null) {
ScanQueryMatcher.MatchCode qcode = matcher.match(kv); // kv is no longer immutable due to KeyOnlyFilter! use copy for safety
KeyValue copyKv = new KeyValue(kv.getBuffer(), kv.getOffset(), kv.getLength());
ScanQueryMatcher.MatchCode qcode = matcher.match(copyKv);
//DebugPrint.println("SS peek kv = " + kv + " with qcode = " + qcode); //DebugPrint.println("SS peek kv = " + kv + " with qcode = " + qcode);
switch(qcode) { switch(qcode) {
case INCLUDE: case INCLUDE:
KeyValue next = this.heap.next(); results.add(copyKv);
results.add(next); this.heap.next();
if (limit > 0 && (results.size() == limit)) { if (limit > 0 && (results.size() == limit)) {
break LOOP; break LOOP;
} }

View File

@ -328,28 +328,20 @@ public class TestKeyValue extends TestCase {
byte [] value = Bytes.toBytes("a real value"); byte [] value = Bytes.toBytes("a real value");
byte [] evalue = new byte[0]; // empty value byte [] evalue = new byte[0]; // empty value
// verify key with a non-empty value works for (byte[] val : new byte[][]{value, evalue}) {
KeyValue kv1 = new KeyValue(rowA, family, qualA, ts, value); for (boolean useLen : new boolean[]{false,true}) {
KeyValue kv1ko = kv1.clone(); KeyValue kv1 = new KeyValue(rowA, family, qualA, ts, val);
assertTrue(kv1.equals(kv1ko)); KeyValue kv1ko = kv1.clone();
kv1ko.convertToKeyOnly(); assertTrue(kv1.equals(kv1ko));
// keys are still the same kv1ko.convertToKeyOnly(useLen);
assertTrue(kv1.equals(kv1ko)); // keys are still the same
// but values are not assertTrue(kv1.equals(kv1ko));
assertTrue(kv1.getValue().length != 0); // but values are not
assertTrue(kv1ko.getValue().length == 0); assertTrue(kv1ko.getValue().length == (useLen?Bytes.SIZEOF_INT:0));
if (useLen) {
// verify key with an already-empty value works assertEquals(kv1.getValueLength(), Bytes.toInt(kv1ko.getValue()));
KeyValue kv2 = new KeyValue(rowA, family, qualA, ts, evalue); }
KeyValue kv2ko = kv2.clone(); }
assertTrue(kv2.equals(kv2ko)); }
kv2ko.convertToKeyOnly();
// they should still be equal
assertTrue(kv2.equals(kv2ko));
// but they should have different underlying byte arrays
assertFalse(kv2.getBuffer() == kv2ko.getBuffer());
// both with 0 length values
assertTrue(kv2.getValue().length == 0);
assertTrue(kv2ko.getValue().length == 0);
} }
} }

View File

@ -53,6 +53,7 @@ import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
import org.apache.hadoop.hbase.filter.PrefixFilter; import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.filter.QualifierFilter; import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RegexStringComparator;
@ -479,6 +480,39 @@ public class TestFromClientSide {
scanner.close(); scanner.close();
} }
@Test
public void testKeyOnlyFilter() throws Exception {
byte [] TABLE = Bytes.toBytes("testKeyOnlyFilter");
HTable ht = TEST_UTIL.createTable(TABLE, FAMILY);
byte [][] ROWS = makeN(ROW, 10);
byte [][] QUALIFIERS = {
Bytes.toBytes("col0-<d2v1>-<d3v2>"), Bytes.toBytes("col1-<d2v1>-<d3v2>"),
Bytes.toBytes("col2-<d2v1>-<d3v2>"), Bytes.toBytes("col3-<d2v1>-<d3v2>"),
Bytes.toBytes("col4-<d2v1>-<d3v2>"), Bytes.toBytes("col5-<d2v1>-<d3v2>"),
Bytes.toBytes("col6-<d2v1>-<d3v2>"), Bytes.toBytes("col7-<d2v1>-<d3v2>"),
Bytes.toBytes("col8-<d2v1>-<d3v2>"), Bytes.toBytes("col9-<d2v1>-<d3v2>")
};
for(int i=0;i<10;i++) {
Put put = new Put(ROWS[i]);
put.add(FAMILY, QUALIFIERS[i], VALUE);
ht.put(put);
}
Scan scan = new Scan();
scan.addFamily(FAMILY);
Filter filter = new KeyOnlyFilter(true);
scan.setFilter(filter);
ResultScanner scanner = ht.getScanner(scan);
int count = 0;
for(Result result : ht.getScanner(scan)) {
assertEquals(result.size(), 1);
assertEquals(result.raw()[0].getValueLength(), Bytes.SIZEOF_INT);
assertEquals(Bytes.toInt(result.raw()[0].getValue()), VALUE.length);
count++;
}
assertEquals(count, 10);
scanner.close();
}
/** /**
* Test simple table and non-existent row cases. * Test simple table and non-existent row cases.
*/ */

View File

@ -1312,14 +1312,14 @@ public class TestFilter extends HBaseTestCase {
kvs.length, idx); kvs.length, idx);
} }
private void verifyScanFullNoValues(Scan s, KeyValue [] kvs) private void verifyScanFullNoValues(Scan s, KeyValue [] kvs, boolean useLen)
throws IOException { throws IOException {
InternalScanner scanner = this.region.getScanner(s); InternalScanner scanner = this.region.getScanner(s);
List<KeyValue> results = new ArrayList<KeyValue>(); List<KeyValue> results = new ArrayList<KeyValue>();
int row = 0; int row = 0;
int idx = 0; int idx = 0;
for (boolean done = true; done; row++) { for (boolean more = true; more; row++) {
done = scanner.next(results); more = scanner.next(results);
Arrays.sort(results.toArray(new KeyValue[results.size()]), Arrays.sort(results.toArray(new KeyValue[results.size()]),
KeyValue.COMPARATOR); KeyValue.COMPARATOR);
if(results.isEmpty()) break; if(results.isEmpty()) break;
@ -1336,9 +1336,20 @@ public class TestFilter extends HBaseTestCase {
Bytes.equals(kv.getFamily(), kvs[idx].getFamily())); Bytes.equals(kv.getFamily(), kvs[idx].getFamily()));
assertTrue("Qualifier mismatch", assertTrue("Qualifier mismatch",
Bytes.equals(kv.getQualifier(), kvs[idx].getQualifier())); Bytes.equals(kv.getQualifier(), kvs[idx].getQualifier()));
assertFalse("Value match (expecting no value in result)", assertFalse("Should not have returned whole value",
Bytes.equals(kv.getValue(), kvs[idx].getValue())); Bytes.equals(kv.getValue(), kvs[idx].getValue()));
assertTrue("Value in result is not empty", kv.getValue().length == 0); if (useLen) {
assertEquals("Value in result is not SIZEOF_INT",
kv.getValue().length, Bytes.SIZEOF_INT);
LOG.info("idx = " + idx + ", len=" + kvs[idx].getValueLength()
+ ", actual=" + Bytes.toInt(kv.getValue()));
assertEquals("Scan value should be the length of the actual value. ",
kvs[idx].getValueLength(), Bytes.toInt(kv.getValue()) );
LOG.info("good");
} else {
assertEquals("Value in result is not empty",
kv.getValue().length, 0);
}
idx++; idx++;
} }
results.clear(); results.clear();
@ -1492,9 +1503,11 @@ public class TestFilter extends HBaseTestCase {
// Grab all 6 rows // Grab all 6 rows
long expectedRows = 6; long expectedRows = 6;
long expectedKeys = this.colsPerRow; long expectedKeys = this.colsPerRow;
Scan s = new Scan(); for (boolean useLen : new boolean[]{false,true}) {
s.setFilter(new KeyOnlyFilter()); Scan s = new Scan();
verifyScan(s, expectedRows, expectedKeys); s.setFilter(new KeyOnlyFilter(useLen));
verifyScanFullNoValues(s, expectedKVs); verifyScan(s, expectedRows, expectedKeys);
verifyScanFullNoValues(s, expectedKVs, useLen);
}
} }
} }