HBASE-22833 MultiRowRangeFilter should provide a method for creating… (#493)
* HBASE-22833: MultiRowRangeFilter should provide a method for creating a filter which is functionally equivalent to multiple prefix filters * Delete superfluous comments * Add description for MultiRowRangeFilter constructor * Add null check for rowKeyPrefixes * Fix checkstyle Signed-off-by: huzheng <openinx@gmail.com>
This commit is contained in:
parent
0481b04ca3
commit
6e273e8fdc
|
@ -17,8 +17,10 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.client;
|
package org.apache.hadoop.hbase.client;
|
||||||
|
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import java.util.Arrays;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class ClientUtil {
|
public class ClientUtil {
|
||||||
|
@ -31,4 +33,46 @@ public class ClientUtil {
|
||||||
public static Cursor createCursor(byte[] row) {
|
public static Cursor createCursor(byte[] row) {
|
||||||
return new Cursor(row);
|
return new Cursor(row);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>When scanning for a prefix the scan should stop immediately after the the last row that
|
||||||
|
* has the specified prefix. This method calculates the closest next rowKey immediately following
|
||||||
|
* the given rowKeyPrefix.</p>
|
||||||
|
* <p><b>IMPORTANT: This converts a rowKey<u>Prefix</u> into a rowKey</b>.</p>
|
||||||
|
* <p>If the prefix is an 'ASCII' string put into a byte[] then this is easy because you can
|
||||||
|
* simply increment the last byte of the array.
|
||||||
|
* But if your application uses real binary rowids you may run into the scenario that your
|
||||||
|
* prefix is something like:</p>
|
||||||
|
* <b>{ 0x12, 0x23, 0xFF, 0xFF }</b><br/>
|
||||||
|
* Then this stopRow needs to be fed into the actual scan<br/>
|
||||||
|
* <b>{ 0x12, 0x24 }</b> (Notice that it is shorter now)<br/>
|
||||||
|
* This method calculates the correct stop row value for this usecase.
|
||||||
|
*
|
||||||
|
* @param rowKeyPrefix the rowKey<u>Prefix</u>.
|
||||||
|
* @return the closest next rowKey immediately following the given rowKeyPrefix.
|
||||||
|
*/
|
||||||
|
public static byte[] calculateTheClosestNextRowKeyForPrefix(byte[] rowKeyPrefix) {
|
||||||
|
// Essentially we are treating it like an 'unsigned very very long' and doing +1 manually.
|
||||||
|
// Search for the place where the trailing 0xFFs start
|
||||||
|
int offset = rowKeyPrefix.length;
|
||||||
|
while (offset > 0) {
|
||||||
|
if (rowKeyPrefix[offset - 1] != (byte) 0xFF) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
offset--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (offset == 0) {
|
||||||
|
// We got an 0xFFFF... (only FFs) stopRow value which is
|
||||||
|
// the last possible prefix before the end of the table.
|
||||||
|
// So set it to stop at the 'end of the table'
|
||||||
|
return HConstants.EMPTY_END_ROW;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the right length of the original
|
||||||
|
byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
|
||||||
|
// And increment the last one
|
||||||
|
newStopRow[newStopRow.length - 1]++;
|
||||||
|
return newStopRow;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.client;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -563,53 +562,11 @@ public class Scan extends Query {
|
||||||
setStopRow(HConstants.EMPTY_END_ROW);
|
setStopRow(HConstants.EMPTY_END_ROW);
|
||||||
} else {
|
} else {
|
||||||
this.setStartRow(rowPrefix);
|
this.setStartRow(rowPrefix);
|
||||||
this.setStopRow(calculateTheClosestNextRowKeyForPrefix(rowPrefix));
|
this.setStopRow(ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowPrefix));
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* <p>When scanning for a prefix the scan should stop immediately after the the last row that
|
|
||||||
* has the specified prefix. This method calculates the closest next rowKey immediately following
|
|
||||||
* the given rowKeyPrefix.</p>
|
|
||||||
* <p><b>IMPORTANT: This converts a rowKey<u>Prefix</u> into a rowKey</b>.</p>
|
|
||||||
* <p>If the prefix is an 'ASCII' string put into a byte[] then this is easy because you can
|
|
||||||
* simply increment the last byte of the array.
|
|
||||||
* But if your application uses real binary rowids you may run into the scenario that your
|
|
||||||
* prefix is something like:</p>
|
|
||||||
* <b>{ 0x12, 0x23, 0xFF, 0xFF }</b><br/>
|
|
||||||
* Then this stopRow needs to be fed into the actual scan<br/>
|
|
||||||
* <b>{ 0x12, 0x24 }</b> (Notice that it is shorter now)<br/>
|
|
||||||
* This method calculates the correct stop row value for this usecase.
|
|
||||||
*
|
|
||||||
* @param rowKeyPrefix the rowKey<u>Prefix</u>.
|
|
||||||
* @return the closest next rowKey immediately following the given rowKeyPrefix.
|
|
||||||
*/
|
|
||||||
private byte[] calculateTheClosestNextRowKeyForPrefix(byte[] rowKeyPrefix) {
|
|
||||||
// Essentially we are treating it like an 'unsigned very very long' and doing +1 manually.
|
|
||||||
// Search for the place where the trailing 0xFFs start
|
|
||||||
int offset = rowKeyPrefix.length;
|
|
||||||
while (offset > 0) {
|
|
||||||
if (rowKeyPrefix[offset - 1] != (byte) 0xFF) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
offset--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (offset == 0) {
|
|
||||||
// We got an 0xFFFF... (only FFs) stopRow value which is
|
|
||||||
// the last possible prefix before the end of the table.
|
|
||||||
// So set it to stop at the 'end of the table'
|
|
||||||
return HConstants.EMPTY_END_ROW;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy the right length of the original
|
|
||||||
byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
|
|
||||||
// And increment the last one
|
|
||||||
newStopRow[newStopRow.length - 1]++;
|
|
||||||
return newStopRow;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get all available versions.
|
* Get all available versions.
|
||||||
* @return this
|
* @return this
|
||||||
|
|
|
@ -26,12 +26,13 @@ import org.apache.hadoop.hbase.Cell;
|
||||||
import org.apache.hadoop.hbase.CellUtil;
|
import org.apache.hadoop.hbase.CellUtil;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.PrivateCellUtil;
|
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.hadoop.hbase.client.ClientUtil;
|
||||||
import org.apache.hadoop.hbase.exceptions.DeserializationException;
|
import org.apache.hadoop.hbase.exceptions.DeserializationException;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
|
import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
|
||||||
import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
|
import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filter to support scan multiple row key ranges. It can construct the row key ranges from the
|
* Filter to support scan multiple row key ranges. It can construct the row key ranges from the
|
||||||
|
@ -71,6 +72,33 @@ public class MultiRowRangeFilter extends FilterBase {
|
||||||
this.ranges = new RangeIteration(rangeList);
|
this.ranges = new RangeIteration(rangeList);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor for creating a <code>MultiRowRangeFilter</code> from multiple rowkey prefixes.
|
||||||
|
*
|
||||||
|
* As <code>MultiRowRangeFilter</code> javadoc says (See the solution 1 of the first statement),
|
||||||
|
* if you try to create a filter list that scans row keys corresponding to given prefixes (e.g.,
|
||||||
|
* <code>FilterList</code> composed of multiple <code>PrefixFilter</code>s), this constructor
|
||||||
|
* provides a way to avoid creating an inefficient one.
|
||||||
|
*
|
||||||
|
* @param rowKeyPrefixes the array of byte array
|
||||||
|
*/
|
||||||
|
public MultiRowRangeFilter(byte[][] rowKeyPrefixes) {
|
||||||
|
this(createRangeListFromRowKeyPrefixes(rowKeyPrefixes));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<RowRange> createRangeListFromRowKeyPrefixes(byte[][] rowKeyPrefixes) {
|
||||||
|
if (rowKeyPrefixes == null) {
|
||||||
|
throw new IllegalArgumentException("Invalid rowkey prefixes");
|
||||||
|
}
|
||||||
|
|
||||||
|
List<RowRange> list = new ArrayList<>();
|
||||||
|
for (byte[] rowKeyPrefix: rowKeyPrefixes) {
|
||||||
|
byte[] stopRow = ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowKeyPrefix);
|
||||||
|
list.add(new RowRange(rowKeyPrefix, true, stopRow, false));
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
public List<RowRange> getRowRanges() {
|
public List<RowRange> getRowRanges() {
|
||||||
// Used by hbase-rest
|
// Used by hbase-rest
|
||||||
return this.rangeList;
|
return this.rangeList;
|
||||||
|
|
|
@ -82,6 +82,61 @@ public class TestMultiRowRangeFilter {
|
||||||
TEST_UTIL.shutdownMiniCluster();
|
TEST_UTIL.shutdownMiniCluster();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRowKeyPrefixWithEmptyPrefix() throws IOException {
|
||||||
|
byte[] prefix = {};
|
||||||
|
byte[][] rowKeyPrefixes = new byte[1][];
|
||||||
|
rowKeyPrefixes[0] = prefix;
|
||||||
|
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
|
||||||
|
List<RowRange> actualRanges = filter.getRowRanges();
|
||||||
|
List<RowRange> expectedRanges = new ArrayList<>();
|
||||||
|
expectedRanges.add(
|
||||||
|
new RowRange(HConstants.EMPTY_START_ROW, true, HConstants.EMPTY_END_ROW, false)
|
||||||
|
);
|
||||||
|
assertRangesEqual(expectedRanges, actualRanges);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRowKeyPrefixWithLastIncrementablePrefix() throws IOException {
|
||||||
|
byte[] prefix = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFE};
|
||||||
|
byte[][] rowKeyPrefixes = new byte[1][];
|
||||||
|
rowKeyPrefixes[0] = prefix;
|
||||||
|
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
|
||||||
|
List<RowRange> actualRanges = filter.getRowRanges();
|
||||||
|
List<RowRange> expectedRanges = new ArrayList<>();
|
||||||
|
final byte[] expectedStop = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
|
||||||
|
expectedRanges.add(new RowRange(prefix, true, expectedStop , false));
|
||||||
|
assertRangesEqual(expectedRanges, actualRanges);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRowKeyPrefixWithoutLastIncrementablePrefix() throws IOException {
|
||||||
|
byte[] prefix = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
|
||||||
|
byte[][] rowKeyPrefixes = new byte[1][];
|
||||||
|
rowKeyPrefixes[0] = prefix;
|
||||||
|
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
|
||||||
|
List<RowRange> actualRanges = filter.getRowRanges();
|
||||||
|
List<RowRange> expectedRanges = new ArrayList<>();
|
||||||
|
final byte[] expectedStop = {(byte) 0x12, (byte) 0x24};
|
||||||
|
expectedRanges.add(new RowRange(prefix, true, expectedStop , false));
|
||||||
|
assertRangesEqual(expectedRanges, actualRanges);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRowKeyPrefixWithMergablePrefix() throws IOException {
|
||||||
|
byte[] prefix1 = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFE};
|
||||||
|
byte[] prefix2 = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
|
||||||
|
byte[][] rowKeyPrefixes = new byte[2][];
|
||||||
|
rowKeyPrefixes[0] = prefix1;
|
||||||
|
rowKeyPrefixes[1] = prefix2;
|
||||||
|
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
|
||||||
|
List<RowRange> actualRanges = filter.getRowRanges();
|
||||||
|
List<RowRange> expectedRanges = new ArrayList<>();
|
||||||
|
final byte[] expectedStop = {(byte) 0x12, (byte) 0x24};
|
||||||
|
expectedRanges.add(new RowRange(prefix1, true, expectedStop , false));
|
||||||
|
assertRangesEqual(expectedRanges, actualRanges);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRanges() throws IOException {
|
public void testRanges() throws IOException {
|
||||||
byte[] key1Start = new byte[] {-3};
|
byte[] key1Start = new byte[] {-3};
|
||||||
|
|
Loading…
Reference in New Issue