HBASE-22833 MultiRowRangeFilter should provide a method for creating… (#493)

* HBASE-22833: MultiRowRangeFilter should provide a method for creating a filter which is functionally equivalent to multiple prefix filters * Delete superfluous comments * Add description for MultiRowRangeFilter constructor * Add null check for rowKeyPrefixes * Fix checkstyle Signed-off-by: huzheng <openinx@gmail.com>
2019-08-16 10:59:01 +09:00 · 2019-08-16 10:59:01 +09:00 · 6e273e8fdc
parent 0481b04ca3
commit 6e273e8fdc
4 changed files with 131 additions and 47 deletions
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientUtil.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientUtil.java
@ -17,8 +17,10 @@
 */
 package org.apache.hadoop.hbase.client;
-import org.apache.yetus.audience.InterfaceAudience;
+import java.util.Arrays;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private
 public class ClientUtil {
@ -31,4 +33,46 @@ public class ClientUtil {
  public static Cursor createCursor(byte[] row) {
    return new Cursor(row);
  }
  /**
   * <p>When scanning for a prefix the scan should stop immediately after the the last row that
   * has the specified prefix. This method calculates the closest next rowKey immediately following
   * the given rowKeyPrefix.</p>
   * <p><b>IMPORTANT: This converts a rowKey<u>Prefix</u> into a rowKey</b>.</p>
   * <p>If the prefix is an 'ASCII' string put into a byte[] then this is easy because you can
   * simply increment the last byte of the array.
   * But if your application uses real binary rowids you may run into the scenario that your
   * prefix is something like:</p>
   * &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x23, 0xFF, 0xFF }</b><br/>
   * Then this stopRow needs to be fed into the actual scan<br/>
   * &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x24 }</b> (Notice that it is shorter now)<br/>
   * This method calculates the correct stop row value for this usecase.
   *
   * @param rowKeyPrefix the rowKey<u>Prefix</u>.
   * @return the closest next rowKey immediately following the given rowKeyPrefix.
   */
  public static byte[] calculateTheClosestNextRowKeyForPrefix(byte[] rowKeyPrefix) {
    // Essentially we are treating it like an 'unsigned very very long' and doing +1 manually.
    // Search for the place where the trailing 0xFFs start
    int offset = rowKeyPrefix.length;
    while (offset > 0) {
      if (rowKeyPrefix[offset - 1] != (byte) 0xFF) {
        break;
      }
      offset--;
    }
    if (offset == 0) {
      // We got an 0xFFFF... (only FFs) stopRow value which is
      // the last possible prefix before the end of the table.
      // So set it to stop at the 'end of the table'
      return HConstants.EMPTY_END_ROW;
    }
    // Copy the right length of the original
    byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
    // And increment the last one
    newStopRow[newStopRow.length - 1]++;
    return newStopRow;
  }
 }
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java
@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.client;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@ -563,53 +562,11 @@ public class Scan extends Query {
      setStopRow(HConstants.EMPTY_END_ROW);
    } else {
      this.setStartRow(rowPrefix);
-      this.setStopRow(calculateTheClosestNextRowKeyForPrefix(rowPrefix));
+      this.setStopRow(ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowPrefix));
    }
    return this;
  }
  /**
   * <p>When scanning for a prefix the scan should stop immediately after the the last row that
   * has the specified prefix. This method calculates the closest next rowKey immediately following
   * the given rowKeyPrefix.</p>
   * <p><b>IMPORTANT: This converts a rowKey<u>Prefix</u> into a rowKey</b>.</p>
   * <p>If the prefix is an 'ASCII' string put into a byte[] then this is easy because you can
   * simply increment the last byte of the array.
   * But if your application uses real binary rowids you may run into the scenario that your
   * prefix is something like:</p>
   * &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x23, 0xFF, 0xFF }</b><br/>
   * Then this stopRow needs to be fed into the actual scan<br/>
   * &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x24 }</b> (Notice that it is shorter now)<br/>
   * This method calculates the correct stop row value for this usecase.
   *
   * @param rowKeyPrefix the rowKey<u>Prefix</u>.
   * @return the closest next rowKey immediately following the given rowKeyPrefix.
   */
  private byte[] calculateTheClosestNextRowKeyForPrefix(byte[] rowKeyPrefix) {
    // Essentially we are treating it like an 'unsigned very very long' and doing +1 manually.
    // Search for the place where the trailing 0xFFs start
    int offset = rowKeyPrefix.length;
    while (offset > 0) {
      if (rowKeyPrefix[offset - 1] != (byte) 0xFF) {
        break;
      }
      offset--;
    }
    if (offset == 0) {
      // We got an 0xFFFF... (only FFs) stopRow value which is
      // the last possible prefix before the end of the table.
      // So set it to stop at the 'end of the table'
      return HConstants.EMPTY_END_ROW;
    }
    // Copy the right length of the original
    byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
    // And increment the last one
    newStopRow[newStopRow.length - 1]++;
    return newStopRow;
  }
  /**
   * Get all available versions.
   * @return this
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java
@ -26,12 +26,13 @@ import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.PrivateCellUtil;
-import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.hadoop.hbase.client.ClientUtil;
 import org.apache.hadoop.hbase.exceptions.DeserializationException;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
 import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
 import org.apache.hadoop.hbase.util.Bytes;
 /**
 * Filter to support scan multiple row key ranges. It can construct the row key ranges from the
@ -71,6 +72,33 @@ public class MultiRowRangeFilter extends FilterBase {
    this.ranges = new RangeIteration(rangeList);
  }
  /**
   * Constructor for creating a <code>MultiRowRangeFilter</code> from multiple rowkey prefixes.
   *
   * As <code>MultiRowRangeFilter</code> javadoc says (See the solution 1 of the first statement),
   * if you try to create a filter list that scans row keys corresponding to given prefixes (e.g.,
   * <code>FilterList</code> composed of multiple <code>PrefixFilter</code>s), this constructor
   * provides a way to avoid creating an inefficient one.
   *
   * @param rowKeyPrefixes the array of byte array
   */
  public MultiRowRangeFilter(byte[][] rowKeyPrefixes) {
    this(createRangeListFromRowKeyPrefixes(rowKeyPrefixes));
  }
  private static List<RowRange> createRangeListFromRowKeyPrefixes(byte[][] rowKeyPrefixes) {
    if (rowKeyPrefixes == null) {
      throw new IllegalArgumentException("Invalid rowkey prefixes");
    }
    List<RowRange> list = new ArrayList<>();
    for (byte[] rowKeyPrefix: rowKeyPrefixes) {
      byte[] stopRow = ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowKeyPrefix);
      list.add(new RowRange(rowKeyPrefix, true, stopRow, false));
    }
    return list;
  }
  public List<RowRange> getRowRanges() {
    // Used by hbase-rest
    return this.rangeList;
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java
@ -82,6 +82,61 @@ public class TestMultiRowRangeFilter {
    TEST_UTIL.shutdownMiniCluster();
  }
  @Test
  public void testRowKeyPrefixWithEmptyPrefix() throws IOException {
    byte[] prefix = {};
    byte[][] rowKeyPrefixes = new byte[1][];
    rowKeyPrefixes[0] = prefix;
    MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
    List<RowRange> actualRanges = filter.getRowRanges();
    List<RowRange> expectedRanges = new ArrayList<>();
    expectedRanges.add(
      new RowRange(HConstants.EMPTY_START_ROW, true, HConstants.EMPTY_END_ROW, false)
    );
    assertRangesEqual(expectedRanges, actualRanges);
  }
  @Test
  public void testRowKeyPrefixWithLastIncrementablePrefix() throws IOException {
    byte[] prefix = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFE};
    byte[][] rowKeyPrefixes = new byte[1][];
    rowKeyPrefixes[0] = prefix;
    MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
    List<RowRange> actualRanges = filter.getRowRanges();
    List<RowRange> expectedRanges = new ArrayList<>();
    final byte[] expectedStop = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
    expectedRanges.add(new RowRange(prefix, true, expectedStop , false));
    assertRangesEqual(expectedRanges, actualRanges);
  }
  @Test
  public void testRowKeyPrefixWithoutLastIncrementablePrefix() throws IOException {
    byte[] prefix = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
    byte[][] rowKeyPrefixes = new byte[1][];
    rowKeyPrefixes[0] = prefix;
    MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
    List<RowRange> actualRanges = filter.getRowRanges();
    List<RowRange> expectedRanges = new ArrayList<>();
    final byte[] expectedStop = {(byte) 0x12, (byte) 0x24};
    expectedRanges.add(new RowRange(prefix, true, expectedStop , false));
    assertRangesEqual(expectedRanges, actualRanges);
  }
  @Test
  public void testRowKeyPrefixWithMergablePrefix() throws IOException {
    byte[] prefix1 = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFE};
    byte[] prefix2 = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
    byte[][] rowKeyPrefixes = new byte[2][];
    rowKeyPrefixes[0] = prefix1;
    rowKeyPrefixes[1] = prefix2;
    MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
    List<RowRange> actualRanges = filter.getRowRanges();
    List<RowRange> expectedRanges = new ArrayList<>();
    final byte[] expectedStop = {(byte) 0x12, (byte) 0x24};
    expectedRanges.add(new RowRange(prefix1, true, expectedStop , false));
    assertRangesEqual(expectedRanges, actualRanges);
  }
  @Test
  public void testRanges() throws IOException {
    byte[] key1Start = new byte[] {-3};