HBASE-21520 TestMultiColumnScanner cost long time when using ROWCOL bloom type

2018-12-13 15:04:12 +08:00 · 2018-12-13 15:04:12 +08:00 · ac0b3bb547
parent 491153488e
commit ac0b3bb547
5 changed files with 219 additions and 67 deletions
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java
@ -32,11 +32,9 @@ import java.util.Map;
 import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
-import org.apache.commons.lang3.ArrayUtils;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparatorImpl;
 import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
@ -47,29 +45,27 @@ import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.testclassification.RegionServerTests;
 import org.apache.hadoop.hbase.util.BloomFilterUtil;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.ClassRule;
 import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
+import org.junit.runners.Parameterized.Parameter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 /**
- * Tests optimized scanning of multiple columns.
+ * Tests optimized scanning of multiple columns. <br>
+ * We separated the big test into several sub-class UT, because When in ROWCOL bloom type, we will
+ * test the row-col bloom filter frequently for saving HDFS seek once we switch from one column to
+ * another in our UT. It's cpu time consuming (~45s for each case), so moved the ROWCOL case into a
+ * separated LargeTests to avoid timeout failure. <br>
+ * <br>
+ * To be clear: In TestMultiColumnScanner, we will flush 10 (NUM_FLUSHES=10) HFiles here, and the
+ * table will put ~1000 cells (rows=20, ts=6, qualifiers=8, total=20*6*8 ~ 1000) . Each full table
+ * scan will check the ROWCOL bloom filter 20 (rows)* 8 (column) * 10 (hfiles)= 1600 times, beside
+ * it will scan the full table 6*2^8=1536 times, so finally will have 1600*1536=2457600 bloom filter
+ * testing. (See HBASE-21520)
 */
-@RunWith(Parameterized.class)
-@Category({RegionServerTests.class, MediumTests.class})
-public class TestMultiColumnScanner {
-
-  @ClassRule
-  public static final HBaseClassTestRule CLASS_RULE =
-      HBaseClassTestRule.forClass(TestMultiColumnScanner.class);
+public abstract class TestMultiColumnScanner {

  private static final Logger LOG = LoggerFactory.getLogger(TestMultiColumnScanner.class);

@ -104,20 +100,19 @@ public class TestMultiColumnScanner {
  /** The probability that a column is skipped in a store file. */
  private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7;

-  /** The probability of skipping a column in a single row */
-  private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1;
-
-  /** The probability of skipping a column everywhere */
-  private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1;
-
  /** The probability to delete a row/column pair */
  private static final double DELETE_PROBABILITY = 0.02;

  private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();

-  private final Compression.Algorithm comprAlgo;
-  private final BloomType bloomType;
-  private final DataBlockEncoding dataBlockEncoding;
+  @Parameter(0)
+  public Compression.Algorithm comprAlgo;
+
+  @Parameter(1)
+  public BloomType bloomType;
+
+  @Parameter(2)
+  public DataBlockEncoding dataBlockEncoding;

  // Some static sanity-checking.
  static {
@ -128,27 +123,17 @@ public class TestMultiColumnScanner {
      assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]);
  }

-  @Parameters
-  public static final Collection<Object[]> parameters() {
+  public static Collection<Object[]> generateParams(Compression.Algorithm algo,
+      boolean useDataBlockEncoding) {
    List<Object[]> parameters = new ArrayList<>();
-    for (Object[] bloomAndCompressionParams :
-        HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS) {
-      for (boolean useDataBlockEncoding : new boolean[]{false, true}) {
-        parameters.add(ArrayUtils.add(bloomAndCompressionParams,
-            useDataBlockEncoding));
-      }
+    for (BloomType bloomType : BloomType.values()) {
+      DataBlockEncoding dataBlockEncoding =
+          useDataBlockEncoding ? DataBlockEncoding.PREFIX : DataBlockEncoding.NONE;
+      parameters.add(new Object[] { algo, bloomType, dataBlockEncoding });
    }
    return parameters;
  }

-  public TestMultiColumnScanner(Compression.Algorithm comprAlgo,
-      BloomType bloomType, boolean useDataBlockEncoding) {
-    this.comprAlgo = comprAlgo;
-    this.bloomType = bloomType;
-    this.dataBlockEncoding = useDataBlockEncoding ? DataBlockEncoding.PREFIX :
-        DataBlockEncoding.NONE;
-  }
-
  @Test
  public void testMultiColumnScanner() throws IOException {
    TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
@ -170,24 +155,6 @@ public class TestMultiColumnScanner {
    Map<String, Long> lastDelTimeMap = new HashMap<>();

    Random rand = new Random(29372937L);
-    Set<String> rowQualSkip = new HashSet<>();
-
-    // Skip some columns in some rows. We need to test scanning over a set
-    // of columns when some of the columns are not there.
-    for (String row : rows)
-      for (String qual : qualifiers)
-        if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) {
-          LOG.info("Skipping " + qual + " in row " + row);
-          rowQualSkip.add(rowQualKey(row, qual));
-        }
-
-    // Also skip some columns in all rows.
-    for (String qual : qualifiers)
-      if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) {
-        LOG.info("Skipping " + qual + " in all rows");
-        for (String row : rows)
-          rowQualSkip.add(rowQualKey(row, qual));
-      }

    for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) {
      for (String qual : qualifiers) {
@ -316,10 +283,6 @@ public class TestMultiColumnScanner {
        kv.getQualifierLength());
  }

-  private static String rowQualKey(String row, String qual) {
-    return row + "_" + qual;
-  }
-
  static String createValue(String row, String qual, long ts) {
    return "value_for_" + row + "_" + qual + "_" + ts;
  }
@ -339,10 +302,7 @@ public class TestMultiColumnScanner {

      lst.add(sb.toString());
    }
-
    return lst;
  }
-
-
 }

--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java
@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.GZ and no use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithAlgoGZAndNoDataEncoding extends TestMultiColumnScanner {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.class);
+
+  @Parameters
+  public static Collection<Object[]> parameters() {
+    return TestMultiColumnScanner.generateParams(Algorithm.GZ, false);
+  }
+}
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java
@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.GZ and use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithAlgoGZAndUseDataEncoding extends TestMultiColumnScanner {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.class);
+
+  @Parameters
+  public static Collection<Object[]> parameters() {
+    return TestMultiColumnScanner.generateParams(Algorithm.GZ, true);
+  }
+}
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java
@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.NONE and no use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithNoneAndNoDataEncoding extends TestMultiColumnScanner {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestMultiColumnScannerWithNoneAndNoDataEncoding.class);
+
+  @Parameters
+  public static Collection<Object[]> parameters() {
+    return TestMultiColumnScanner.generateParams(Algorithm.NONE, false);
+  }
+}
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java
@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.NONE and no use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithNoneAndUseDataEncoding extends TestMultiColumnScanner {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestMultiColumnScannerWithNoneAndUseDataEncoding.class);
+
+  @Parameters
+  public static Collection<Object[]> parameters() {
+    return TestMultiColumnScanner.generateParams(Algorithm.NONE, true);
+  }
+}