HBASE-2824 A filter that randomly includes rows based on a configured chance
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1067232 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5bb4725494
commit
1aa394d882
|
@ -63,7 +63,8 @@ Release 0.91.0 - Unreleased
|
||||||
HBASE-3393 Update Avro gateway to use Avro 1.4.1 and the new
|
HBASE-3393 Update Avro gateway to use Avro 1.4.1 and the new
|
||||||
server.join() method (Jeff Hammerbacher via Stack)
|
server.join() method (Jeff Hammerbacher via Stack)
|
||||||
HBASE-3437 Support Explict Split Points from the Shell
|
HBASE-3437 Support Explict Split Points from the Shell
|
||||||
HBASE-3433 KeyValue API to explicitly distinguish between deep & shallow copies
|
HBASE-3433 KeyValue API to explicitly distinguish between deep & shallow
|
||||||
|
copies
|
||||||
HBASE-3305 Allow round-robin distribution for table created with
|
HBASE-3305 Allow round-robin distribution for table created with
|
||||||
multiple regions (ted yu via jgray)
|
multiple regions (ted yu via jgray)
|
||||||
|
|
||||||
|
@ -74,6 +75,9 @@ Release 0.91.0 - Unreleased
|
||||||
HBASE-3335 Add BitComparator for filtering (Nathaniel Cook via Stack)
|
HBASE-3335 Add BitComparator for filtering (Nathaniel Cook via Stack)
|
||||||
HBASE-3256 Coprocessors: Coprocessor host and observer for HMaster
|
HBASE-3256 Coprocessors: Coprocessor host and observer for HMaster
|
||||||
HBASE-3448 RegionSplitter, utility class to manually split tables
|
HBASE-3448 RegionSplitter, utility class to manually split tables
|
||||||
|
HBASE-2824 A filter that randomly includes rows based on a configured
|
||||||
|
chance (Ferdy via Andrew Purtell)
|
||||||
|
|
||||||
|
|
||||||
Release 0.90.1 - Unreleased
|
Release 0.90.1 - Unreleased
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,118 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2011 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase.filter;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A filter that includes rows based on a chance.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class RandomRowFilter extends FilterBase {
|
||||||
|
protected static final Random random = new Random();
|
||||||
|
|
||||||
|
protected float chance;
|
||||||
|
protected boolean filterOutRow;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writable constructor, do not use.
|
||||||
|
*/
|
||||||
|
public RandomRowFilter() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new filter with a specified chance for a row to be included.
|
||||||
|
*
|
||||||
|
* @param chance
|
||||||
|
*/
|
||||||
|
public RandomRowFilter(float chance) {
|
||||||
|
this.chance = chance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The chance that a row gets included.
|
||||||
|
*/
|
||||||
|
public float getChance() {
|
||||||
|
return chance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the chance that a row is included.
|
||||||
|
*
|
||||||
|
* @param chance
|
||||||
|
*/
|
||||||
|
public void setChance(float chance) {
|
||||||
|
this.chance = chance;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean filterAllRemaining() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ReturnCode filterKeyValue(KeyValue v) {
|
||||||
|
if (filterOutRow) {
|
||||||
|
return ReturnCode.NEXT_ROW;
|
||||||
|
}
|
||||||
|
return ReturnCode.INCLUDE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean filterRow() {
|
||||||
|
return filterOutRow;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean filterRowKey(byte[] buffer, int offset, int length) {
|
||||||
|
if (chance < 0) {
|
||||||
|
// with a zero chance, the rows is always excluded
|
||||||
|
filterOutRow = true;
|
||||||
|
} else if (chance > 1) {
|
||||||
|
// always included
|
||||||
|
filterOutRow = false;
|
||||||
|
} else {
|
||||||
|
// roll the dice
|
||||||
|
filterOutRow = !(random.nextFloat() < chance);
|
||||||
|
}
|
||||||
|
return filterOutRow;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() {
|
||||||
|
filterOutRow = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException {
|
||||||
|
chance = in.readFloat();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException {
|
||||||
|
out.writeFloat(chance);
|
||||||
|
}
|
||||||
|
}
|
|
@ -22,7 +22,6 @@ import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.DataInput;
|
import java.io.DataInput;
|
||||||
import java.io.DataOutput;
|
import java.io.DataOutput;
|
||||||
import java.io.DataOutputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.ObjectInputStream;
|
import java.io.ObjectInputStream;
|
||||||
import java.io.ObjectOutputStream;
|
import java.io.ObjectOutputStream;
|
||||||
|
@ -73,6 +72,7 @@ import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
|
||||||
import org.apache.hadoop.hbase.filter.PageFilter;
|
import org.apache.hadoop.hbase.filter.PageFilter;
|
||||||
import org.apache.hadoop.hbase.filter.PrefixFilter;
|
import org.apache.hadoop.hbase.filter.PrefixFilter;
|
||||||
import org.apache.hadoop.hbase.filter.QualifierFilter;
|
import org.apache.hadoop.hbase.filter.QualifierFilter;
|
||||||
|
import org.apache.hadoop.hbase.filter.RandomRowFilter;
|
||||||
import org.apache.hadoop.hbase.filter.RowFilter;
|
import org.apache.hadoop.hbase.filter.RowFilter;
|
||||||
import org.apache.hadoop.hbase.filter.SingleColumnValueExcludeFilter;
|
import org.apache.hadoop.hbase.filter.SingleColumnValueExcludeFilter;
|
||||||
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
|
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
|
||||||
|
@ -218,6 +218,7 @@ public class HbaseObjectWritable implements Writable, WritableWithSize, Configur
|
||||||
// serializable
|
// serializable
|
||||||
addToMap(Serializable.class, code++);
|
addToMap(Serializable.class, code++);
|
||||||
|
|
||||||
|
addToMap(RandomRowFilter.class, code++);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Class<?> declaredClass;
|
private Class<?> declaredClass;
|
||||||
|
|
|
@ -0,0 +1,91 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2011 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase.filter;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.DataInputStream;
|
||||||
|
import java.io.DataOutputStream;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
|
||||||
|
public class TestRandomRowFilter extends TestCase {
|
||||||
|
protected RandomRowFilter halfChanceFilter;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
halfChanceFilter = new RandomRowFilter(0.25f);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests basics
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
public void testBasics() throws Exception {
|
||||||
|
int included = 0;
|
||||||
|
int max = 1000000;
|
||||||
|
for (int i = 0; i < max; i++) {
|
||||||
|
if (!halfChanceFilter.filterRowKey(Bytes.toBytes("row"), 0, Bytes
|
||||||
|
.toBytes("row").length)) {
|
||||||
|
included++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Now let's check if the filter included the right number of rows;
|
||||||
|
// since we're dealing with randomness, we must have a include an epsilon
|
||||||
|
// tolerance.
|
||||||
|
int epsilon = max / 100;
|
||||||
|
assertTrue("Roughly 25% should pass the filter", Math.abs(included - max
|
||||||
|
/ 4) < epsilon);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests serialization
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
public void testSerialization() throws Exception {
|
||||||
|
RandomRowFilter newFilter = serializationTest(halfChanceFilter);
|
||||||
|
// use epsilon float comparison
|
||||||
|
assertTrue("float should be equal", Math.abs(newFilter.getChance()
|
||||||
|
- halfChanceFilter.getChance()) < 0.000001f);
|
||||||
|
}
|
||||||
|
|
||||||
|
private RandomRowFilter serializationTest(RandomRowFilter filter)
|
||||||
|
throws Exception {
|
||||||
|
// Decompose filter to bytes.
|
||||||
|
ByteArrayOutputStream stream = new ByteArrayOutputStream();
|
||||||
|
DataOutputStream out = new DataOutputStream(stream);
|
||||||
|
filter.write(out);
|
||||||
|
out.close();
|
||||||
|
byte[] buffer = stream.toByteArray();
|
||||||
|
|
||||||
|
// Recompose filter.
|
||||||
|
DataInputStream in = new DataInputStream(new ByteArrayInputStream(buffer));
|
||||||
|
RandomRowFilter newFilter = new RandomRowFilter();
|
||||||
|
newFilter.readFields(in);
|
||||||
|
|
||||||
|
return newFilter;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue