HBASE-3684 Support column range filter

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1087593 13f79535-47bb-0310-9956-ffa450edef68
2011-04-01 06:00:37 +00:00 · 2011-04-01 06:00:37 +00:00 · d8d4fc8b7c
parent 22e5b51c7b
commit d8d4fc8b7c
4 changed files with 418 additions and 0 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -119,6 +119,7 @@ Release 0.91.0 - Unreleased
   HBASE-3705  Allow passing timestamp into importtsv (Andy Sautins via Stack)
   HBASE-3715  Book.xml - adding architecture section on client, adding section
               on spec-ex under mapreduce (Doug Meil via Stack)
+   HBASE-3684  Support column range filter (Jerry Chen via Stack)

  TASK
   HBASE-3559  Move report of split to master OFF the heartbeat channel
--- a/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java
+++ b/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java
@ -0,0 +1,172 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.filter;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.DataInput;
+
+/**
+ * This filter is used for selecting only those keys with columns that are
+ * between minColumn to maxColumn. For example, if minColumn is 'an', and
+ * maxColumn is 'be', it will pass keys with columns like 'ana', 'bad', but not
+ * keys with columns like 'bed', 'eye'
+ *
+ * If minColumn is null, there is no lower bound. If maxColumn is null, there is
+ * no upper bound.
+ *
+ * minColumnInclusive and maxColumnInclusive specify if the ranges are inclusive
+ * or not.
+ */
+public class ColumnRangeFilter extends FilterBase {
+  protected byte[] minColumn = null;
+  protected boolean minColumnInclusive = true;
+  protected byte[] maxColumn = null;
+  protected boolean maxColumnInclusive = false;
+
+  public ColumnRangeFilter() {
+    super();
+  }
+  /**
+   * Create a filter to select those keys with columns that are between minColumn
+   * and maxColumn.
+   * @param minColumn minimum value for the column range. If if it's null,
+   * there is no lower bound.
+   * @param minColumnInclusive if true, include minColumn in the range.
+   * @param maxColumn maximum value for the column range. If it's null,
+   * @param maxColumnInclusive if true, include maxColumn in the range.
+   * there is no upper bound.
+   */
+  public ColumnRangeFilter(final byte[] minColumn, boolean minColumnInclusive,
+      final byte[] maxColumn, boolean maxColumnInclusive) {
+    this.minColumn = minColumn;
+    this.minColumnInclusive = minColumnInclusive;
+    this.maxColumn = maxColumn;
+    this.maxColumnInclusive = maxColumnInclusive;
+  }
+
+  /**
+   * @return if min column range is inclusive.
+   */
+  public boolean isMinColumnInclusive() {
+    return minColumnInclusive;
+  }
+
+  /**
+   * @return if max column range is inclusive.
+   */
+  public boolean isMaxColumnInclusive() {
+    return maxColumnInclusive;
+  }
+
+  /**
+   * @return the min column range for the filter
+   */
+  public byte[] getMinColumn() {
+    return this.minColumn;
+  }
+
+  /**
+   * @return the max column range for the filter
+   */
+  public byte[] getMaxColumn() {
+    return this.maxColumn;
+  }
+
+  @Override
+  public ReturnCode filterKeyValue(KeyValue kv) {
+    byte[] buffer = kv.getBuffer();
+    int qualifierOffset = kv.getQualifierOffset();
+    int qualifierLength = kv.getQualifierLength();
+    int cmpMin = 1;
+
+    if (this.minColumn != null) {
+      cmpMin = Bytes.compareTo(buffer, qualifierOffset, qualifierLength,
+          this.minColumn, 0, this.minColumn.length);
+    }
+
+    if (cmpMin < 0) {
+      return ReturnCode.SEEK_NEXT_USING_HINT;
+    }
+
+    if (!this.minColumnInclusive && cmpMin == 0) {
+      return ReturnCode.SKIP;
+    }
+
+    if (this.maxColumn == null) {
+      return ReturnCode.INCLUDE;
+    }
+
+    int cmpMax = Bytes.compareTo(buffer, qualifierOffset, qualifierLength,
+        this.maxColumn, 0, this.maxColumn.length);
+
+    if (this.maxColumnInclusive && cmpMax <= 0 ||
+        !this.maxColumnInclusive && cmpMax < 0) {
+      return ReturnCode.INCLUDE;
+    }
+
+    return ReturnCode.NEXT_ROW;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    // need to write out a flag for null value separately. Otherwise,
+    // we will not be able to differentiate empty string and null
+    out.writeBoolean(this.minColumn == null);
+    Bytes.writeByteArray(out, this.minColumn);
+    out.writeBoolean(this.minColumnInclusive);
+
+    out.writeBoolean(this.maxColumn == null);
+    Bytes.writeByteArray(out, this.maxColumn);
+    out.writeBoolean(this.maxColumnInclusive);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    boolean isMinColumnNull = in.readBoolean();
+    this.minColumn = Bytes.readByteArray(in);
+
+    if (isMinColumnNull) {
+      this.minColumn = null;
+    }
+
+    this.minColumnInclusive = in.readBoolean();
+
+    boolean isMaxColumnNull = in.readBoolean();
+    this.maxColumn = Bytes.readByteArray(in);
+    if (isMaxColumnNull) {
+      this.maxColumn = null;
+    }
+    this.maxColumnInclusive = in.readBoolean();
+  }
+
+
+  @Override
+  public KeyValue getNextKeyHint(KeyValue kv) {
+    return KeyValue.createFirstOnRow(kv.getBuffer(), kv.getRowOffset(), kv
+        .getRowLength(), kv.getBuffer(), kv.getFamilyOffset(), kv
+        .getFamilyLength(), this.minColumn, 0, this.minColumn == null ? 0
+        : this.minColumn.length);
+  }
+}
--- a/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
+++ b/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
@ -64,6 +64,7 @@ import org.apache.hadoop.hbase.filter.BinaryComparator;
 import org.apache.hadoop.hbase.filter.BitComparator;
 import org.apache.hadoop.hbase.filter.ColumnCountGetFilter;
 import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
+import org.apache.hadoop.hbase.filter.ColumnRangeFilter;
 import org.apache.hadoop.hbase.filter.CompareFilter;
 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
 import org.apache.hadoop.hbase.filter.DependentColumnFilter;
@ -222,6 +223,8 @@ public class HbaseObjectWritable implements Writable, WritableWithSize, Configur
    addToMap(RandomRowFilter.class, code++);

    addToMap(CompareOp.class, code++);
+
+    addToMap(ColumnRangeFilter.class, code++);
  }

  private Class<?> declaredClass;
--- a/src/test/java/org/apache/hadoop/hbase/filter/TestColumnRangeFilter.java
+++ b/src/test/java/org/apache/hadoop/hbase/filter/TestColumnRangeFilter.java
@ -0,0 +1,242 @@
+package org.apache.hadoop.hbase.filter;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueTestUtil;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.InternalScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Test;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+
+class StringRange {
+  private String start = null;
+  private String end = null;
+  private boolean startInclusive = true;
+  private boolean endInclusive = false;
+
+  public StringRange(String start, boolean startInclusive, String end,
+      boolean endInclusive) {
+    this.start = start;
+    this.startInclusive = startInclusive;
+    this.end = end;
+    this.endInclusive = endInclusive;
+  }
+
+  public String getStart() {
+    return this.start;
+  }
+
+  public String getEnd() {
+    return this.end;
+  }
+
+  public boolean isStartInclusive() {
+    return this.startInclusive;
+  }
+
+  public boolean isEndInclusive() {
+    return this.endInclusive;
+  }
+
+  @Override
+  public int hashCode() {
+    int hashCode = 0;
+    if (this.start != null) {
+      hashCode ^= this.start.hashCode();
+    }
+
+    if (this.end != null) {
+      hashCode ^= this.end.hashCode();
+    }
+    return hashCode;
+  }
+
+  @Override
+  public String toString() {
+    String result = (this.startInclusive ? "[" : "(")
+          + (this.start == null ? null : this.start) + ", "
+          + (this.end == null ? null : this.end)
+          + (this.endInclusive ? "]" : ")");
+    return result;
+  }
+
+   public boolean inRange(String value) {
+    boolean afterStart = true;
+    if (this.start != null) {
+      int startCmp = value.compareTo(this.start);
+      afterStart = this.startInclusive ? startCmp >= 0 : startCmp > 0;
+    }
+
+    boolean beforeEnd = true;
+    if (this.end != null) {
+      int endCmp = value.compareTo(this.end);
+      beforeEnd = this.endInclusive ? endCmp <= 0 : endCmp < 0;
+    }
+
+    return afterStart && beforeEnd;
+  }
+}
+
+public class TestColumnRangeFilter {
+
+  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  private final Log LOG = LogFactory.getLog(this.getClass());
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.startMiniCluster(3);
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @Before
+  public void setUp() throws Exception {
+    // Nothing to do.
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @After
+  public void tearDown() throws Exception {
+    // Nothing to do.
+  }
+
+  @Test
+  public void TestColumnRangeFilterClient() throws Exception {
+    String family = "Family";
+    String table = "TestColumnRangeFilterClient";
+    HTable ht = TEST_UTIL.createTable(Bytes.toBytes(table),
+        Bytes.toBytes(family), Integer.MAX_VALUE);
+
+    List<String> rows = generateRandomWords(10, 8);
+    long maxTimestamp = 2;
+    List<String> columns = generateRandomWords(20000, 8);
+
+    List<KeyValue> kvList = new ArrayList<KeyValue>();
+
+    Map<StringRange, List<KeyValue>> rangeMap = new HashMap<StringRange, List<KeyValue>>();
+
+    rangeMap.put(new StringRange(null, true, "b", false),
+        new ArrayList<KeyValue>());
+    rangeMap.put(new StringRange("p", true, "q", false),
+        new ArrayList<KeyValue>());
+    rangeMap.put(new StringRange("r", false, "s", true),
+        new ArrayList<KeyValue>());
+    rangeMap.put(new StringRange("z", false, null, false),
+        new ArrayList<KeyValue>());
+    String valueString = "ValueString";
+
+    for (String row : rows) {
+      Put p = new Put(Bytes.toBytes(row));
+      for (String column : columns) {
+        for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) {
+          KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp,
+              valueString);
+          p.add(kv);
+          kvList.add(kv);
+          for (StringRange s : rangeMap.keySet()) {
+            if (s.inRange(column)) {
+              rangeMap.get(s).add(kv);
+            }
+          }
+        }
+      }
+      ht.put(p);
+    }
+
+    TEST_UTIL.flush();
+    try {
+      Thread.sleep(3000);
+    } catch (InterruptedException i) {
+        // ignore
+    }
+
+    ColumnRangeFilter filter;
+    Scan scan = new Scan();
+    scan.setMaxVersions();
+    for (StringRange s : rangeMap.keySet()) {
+      filter = new ColumnRangeFilter(s.getStart() == null ? null
+          : Bytes.toBytes(s.getStart()), s.isStartInclusive(),
+          s.getEnd() == null ? null : Bytes.toBytes(s.getEnd()),
+          s.isEndInclusive());
+      scan.setFilter(filter);
+      ResultScanner scanner = ht.getScanner(scan);
+      List<KeyValue> results = new ArrayList<KeyValue>();
+      LOG.info("scan column range: " + s.toString());
+      long timeBeforeScan = System.currentTimeMillis();
+
+      Result result;
+      while ((result = scanner.next()) != null) {
+        for (KeyValue kv : result.list()) {
+          results.add(kv);
+        }
+      }
+      long scanTime = System.currentTimeMillis() - timeBeforeScan;
+      LOG.info("scan time = " + scanTime + "ms");
+      LOG.info("found " + results.size() + " results");
+      LOG.info("Expecting " + rangeMap.get(s).size() + " results");
+
+      /*
+      for (KeyValue kv : results) {
+        LOG.info("found row " + Bytes.toString(kv.getRow()) + ", column "
+            + Bytes.toString(kv.getQualifier()));
+      }
+      */
+
+      assertEquals(rangeMap.get(s).size(), results.size());
+    }
+  }
+
+  List<String> generateRandomWords(int numberOfWords, int maxLengthOfWords) {
+    Set<String> wordSet = new HashSet<String>();
+    for (int i = 0; i < numberOfWords; i++) {
+      int lengthOfWords = (int) (Math.random() * maxLengthOfWords) + 1;
+      char[] wordChar = new char[lengthOfWords];
+      for (int j = 0; j < wordChar.length; j++) {
+        wordChar[j] = (char) (Math.random() * 26 + 97);
+      }
+      String word = new String(wordChar);
+      wordSet.add(word);
+    }
+    List<String> wordList = new ArrayList<String>(wordSet);
+    return wordList;
+  }
+}