HBASE-2824 A filter that randomly includes rows based on a configured chance
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1067232 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5bb4725494
commit
1aa394d882
|
@ -63,7 +63,8 @@ Release 0.91.0 - Unreleased
|
|||
HBASE-3393 Update Avro gateway to use Avro 1.4.1 and the new
|
||||
server.join() method (Jeff Hammerbacher via Stack)
|
||||
HBASE-3437 Support Explict Split Points from the Shell
|
||||
HBASE-3433 KeyValue API to explicitly distinguish between deep & shallow copies
|
||||
HBASE-3433 KeyValue API to explicitly distinguish between deep & shallow
|
||||
copies
|
||||
HBASE-3305 Allow round-robin distribution for table created with
|
||||
multiple regions (ted yu via jgray)
|
||||
|
||||
|
@ -74,6 +75,9 @@ Release 0.91.0 - Unreleased
|
|||
HBASE-3335 Add BitComparator for filtering (Nathaniel Cook via Stack)
|
||||
HBASE-3256 Coprocessors: Coprocessor host and observer for HMaster
|
||||
HBASE-3448 RegionSplitter, utility class to manually split tables
|
||||
HBASE-2824 A filter that randomly includes rows based on a configured
|
||||
chance (Ferdy via Andrew Purtell)
|
||||
|
||||
|
||||
Release 0.90.1 - Unreleased
|
||||
|
||||
|
|
|
@ -0,0 +1,118 @@
|
|||
/**
|
||||
* Copyright 2011 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
|
||||
/**
|
||||
* A filter that includes rows based on a chance.
|
||||
*
|
||||
*/
|
||||
public class RandomRowFilter extends FilterBase {
|
||||
protected static final Random random = new Random();
|
||||
|
||||
protected float chance;
|
||||
protected boolean filterOutRow;
|
||||
|
||||
/**
|
||||
* Writable constructor, do not use.
|
||||
*/
|
||||
public RandomRowFilter() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new filter with a specified chance for a row to be included.
|
||||
*
|
||||
* @param chance
|
||||
*/
|
||||
public RandomRowFilter(float chance) {
|
||||
this.chance = chance;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The chance that a row gets included.
|
||||
*/
|
||||
public float getChance() {
|
||||
return chance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the chance that a row is included.
|
||||
*
|
||||
* @param chance
|
||||
*/
|
||||
public void setChance(float chance) {
|
||||
this.chance = chance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean filterAllRemaining() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReturnCode filterKeyValue(KeyValue v) {
|
||||
if (filterOutRow) {
|
||||
return ReturnCode.NEXT_ROW;
|
||||
}
|
||||
return ReturnCode.INCLUDE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean filterRow() {
|
||||
return filterOutRow;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean filterRowKey(byte[] buffer, int offset, int length) {
|
||||
if (chance < 0) {
|
||||
// with a zero chance, the rows is always excluded
|
||||
filterOutRow = true;
|
||||
} else if (chance > 1) {
|
||||
// always included
|
||||
filterOutRow = false;
|
||||
} else {
|
||||
// roll the dice
|
||||
filterOutRow = !(random.nextFloat() < chance);
|
||||
}
|
||||
return filterOutRow;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
filterOutRow = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
chance = in.readFloat();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeFloat(chance);
|
||||
}
|
||||
}
|
|
@ -22,7 +22,6 @@ import java.io.ByteArrayInputStream;
|
|||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.ObjectInputStream;
|
||||
import java.io.ObjectOutputStream;
|
||||
|
@ -73,6 +72,7 @@ import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
|
|||
import org.apache.hadoop.hbase.filter.PageFilter;
|
||||
import org.apache.hadoop.hbase.filter.PrefixFilter;
|
||||
import org.apache.hadoop.hbase.filter.QualifierFilter;
|
||||
import org.apache.hadoop.hbase.filter.RandomRowFilter;
|
||||
import org.apache.hadoop.hbase.filter.RowFilter;
|
||||
import org.apache.hadoop.hbase.filter.SingleColumnValueExcludeFilter;
|
||||
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
|
||||
|
@ -218,6 +218,7 @@ public class HbaseObjectWritable implements Writable, WritableWithSize, Configur
|
|||
// serializable
|
||||
addToMap(Serializable.class, code++);
|
||||
|
||||
addToMap(RandomRowFilter.class, code++);
|
||||
}
|
||||
|
||||
private Class<?> declaredClass;
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
/**
|
||||
* Copyright 2011 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
public class TestRandomRowFilter extends TestCase {
|
||||
protected RandomRowFilter halfChanceFilter;
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
halfChanceFilter = new RandomRowFilter(0.25f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests basics
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
public void testBasics() throws Exception {
|
||||
int included = 0;
|
||||
int max = 1000000;
|
||||
for (int i = 0; i < max; i++) {
|
||||
if (!halfChanceFilter.filterRowKey(Bytes.toBytes("row"), 0, Bytes
|
||||
.toBytes("row").length)) {
|
||||
included++;
|
||||
}
|
||||
}
|
||||
// Now let's check if the filter included the right number of rows;
|
||||
// since we're dealing with randomness, we must have a include an epsilon
|
||||
// tolerance.
|
||||
int epsilon = max / 100;
|
||||
assertTrue("Roughly 25% should pass the filter", Math.abs(included - max
|
||||
/ 4) < epsilon);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests serialization
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
public void testSerialization() throws Exception {
|
||||
RandomRowFilter newFilter = serializationTest(halfChanceFilter);
|
||||
// use epsilon float comparison
|
||||
assertTrue("float should be equal", Math.abs(newFilter.getChance()
|
||||
- halfChanceFilter.getChance()) < 0.000001f);
|
||||
}
|
||||
|
||||
private RandomRowFilter serializationTest(RandomRowFilter filter)
|
||||
throws Exception {
|
||||
// Decompose filter to bytes.
|
||||
ByteArrayOutputStream stream = new ByteArrayOutputStream();
|
||||
DataOutputStream out = new DataOutputStream(stream);
|
||||
filter.write(out);
|
||||
out.close();
|
||||
byte[] buffer = stream.toByteArray();
|
||||
|
||||
// Recompose filter.
|
||||
DataInputStream in = new DataInputStream(new ByteArrayInputStream(buffer));
|
||||
RandomRowFilter newFilter = new RandomRowFilter();
|
||||
newFilter.readFields(in);
|
||||
|
||||
return newFilter;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue