From ac0b3bb5477612cb8844c4ef10fa2be0f1d1a025 Mon Sep 17 00:00:00 2001
From: huzheng <openinx@gmail.com>
Date: Thu, 13 Dec 2018 15:04:12 +0800
Subject: [PATCH] HBASE-21520 TestMultiColumnScanner cost long time when using
 ROWCOL bloom type

---
 .../regionserver/TestMultiColumnScanner.java  | 94 ++++++-------------
 ...umnScannerWithAlgoGZAndNoDataEncoding.java | 48 ++++++++++
 ...mnScannerWithAlgoGZAndUseDataEncoding.java | 48 ++++++++++
 ...olumnScannerWithNoneAndNoDataEncoding.java | 48 ++++++++++
 ...lumnScannerWithNoneAndUseDataEncoding.java | 48 ++++++++++
 5 files changed, 219 insertions(+), 67 deletions(-)
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java
index 2ff0d8c24a5..bb97c9c131a 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java
@@ -32,11 +32,9 @@ import java.util.Map;
 import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
-import org.apache.commons.lang3.ArrayUtils;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparatorImpl;
 import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
@@ -47,29 +45,27 @@ import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.testclassification.RegionServerTests;
 import org.apache.hadoop.hbase.util.BloomFilterUtil;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.ClassRule;
 import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
+import org.junit.runners.Parameterized.Parameter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Tests optimized scanning of multiple columns.
+ * Tests optimized scanning of multiple columns. <br>
+ * We separated the big test into several sub-class UT, because When in ROWCOL bloom type, we will
+ * test the row-col bloom filter frequently for saving HDFS seek once we switch from one column to
+ * another in our UT. It's cpu time consuming (~45s for each case), so moved the ROWCOL case into a
+ * separated LargeTests to avoid timeout failure. <br>
+ * <br>
+ * To be clear: In TestMultiColumnScanner, we will flush 10 (NUM_FLUSHES=10) HFiles here, and the
+ * table will put ~1000 cells (rows=20, ts=6, qualifiers=8, total=20*6*8 ~ 1000) . Each full table
+ * scan will check the ROWCOL bloom filter 20 (rows)* 8 (column) * 10 (hfiles)= 1600 times, beside
+ * it will scan the full table 6*2^8=1536 times, so finally will have 1600*1536=2457600 bloom filter
+ * testing. (See HBASE-21520)
  */
-@RunWith(Parameterized.class)
-@Category({RegionServerTests.class, MediumTests.class})
-public class TestMultiColumnScanner {
-
-  @ClassRule
-  public static final HBaseClassTestRule CLASS_RULE =
-      HBaseClassTestRule.forClass(TestMultiColumnScanner.class);
+public abstract class TestMultiColumnScanner {
 
   private static final Logger LOG = LoggerFactory.getLogger(TestMultiColumnScanner.class);
 
@@ -104,20 +100,19 @@ public class TestMultiColumnScanner {
   /** The probability that a column is skipped in a store file. */
   private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7;
 
-  /** The probability of skipping a column in a single row */
-  private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1;
-
-  /** The probability of skipping a column everywhere */
-  private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1;
-
   /** The probability to delete a row/column pair */
   private static final double DELETE_PROBABILITY = 0.02;
 
   private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
 
-  private final Compression.Algorithm comprAlgo;
-  private final BloomType bloomType;
-  private final DataBlockEncoding dataBlockEncoding;
+  @Parameter(0)
+  public Compression.Algorithm comprAlgo;
+
+  @Parameter(1)
+  public BloomType bloomType;
+
+  @Parameter(2)
+  public DataBlockEncoding dataBlockEncoding;
 
   // Some static sanity-checking.
   static {
@@ -128,27 +123,17 @@ public class TestMultiColumnScanner {
       assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]);
   }
 
-  @Parameters
-  public static final Collection<Object[]> parameters() {
+  public static Collection<Object[]> generateParams(Compression.Algorithm algo,
+      boolean useDataBlockEncoding) {
     List<Object[]> parameters = new ArrayList<>();
-    for (Object[] bloomAndCompressionParams :
-        HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS) {
-      for (boolean useDataBlockEncoding : new boolean[]{false, true}) {
-        parameters.add(ArrayUtils.add(bloomAndCompressionParams,
-            useDataBlockEncoding));
-      }
+    for (BloomType bloomType : BloomType.values()) {
+      DataBlockEncoding dataBlockEncoding =
+          useDataBlockEncoding ? DataBlockEncoding.PREFIX : DataBlockEncoding.NONE;
+      parameters.add(new Object[] { algo, bloomType, dataBlockEncoding });
     }
     return parameters;
   }
 
-  public TestMultiColumnScanner(Compression.Algorithm comprAlgo,
-      BloomType bloomType, boolean useDataBlockEncoding) {
-    this.comprAlgo = comprAlgo;
-    this.bloomType = bloomType;
-    this.dataBlockEncoding = useDataBlockEncoding ? DataBlockEncoding.PREFIX :
-        DataBlockEncoding.NONE;
-  }
-
   @Test
   public void testMultiColumnScanner() throws IOException {
     TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
@@ -170,24 +155,6 @@ public class TestMultiColumnScanner {
     Map<String, Long> lastDelTimeMap = new HashMap<>();
 
     Random rand = new Random(29372937L);
-    Set<String> rowQualSkip = new HashSet<>();
-
-    // Skip some columns in some rows. We need to test scanning over a set
-    // of columns when some of the columns are not there.
-    for (String row : rows)
-      for (String qual : qualifiers)
-        if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) {
-          LOG.info("Skipping " + qual + " in row " + row);
-          rowQualSkip.add(rowQualKey(row, qual));
-        }
-
-    // Also skip some columns in all rows.
-    for (String qual : qualifiers)
-      if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) {
-        LOG.info("Skipping " + qual + " in all rows");
-        for (String row : rows)
-          rowQualSkip.add(rowQualKey(row, qual));
-      }
 
     for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) {
       for (String qual : qualifiers) {
@@ -316,10 +283,6 @@ public class TestMultiColumnScanner {
         kv.getQualifierLength());
   }
 
-  private static String rowQualKey(String row, String qual) {
-    return row + "_" + qual;
-  }
-
   static String createValue(String row, String qual, long ts) {
     return "value_for_" + row + "_" + qual + "_" + ts;
   }
@@ -339,10 +302,7 @@ public class TestMultiColumnScanner {
 
       lst.add(sb.toString());
     }
-
     return lst;
   }
-
-
 }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java
new file mode 100644
index 00000000000..cc68c115390
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.GZ and no use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithAlgoGZAndNoDataEncoding extends TestMultiColumnScanner {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.class);
+
+  @Parameters
+  public static Collection<Object[]> parameters() {
+    return TestMultiColumnScanner.generateParams(Algorithm.GZ, false);
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java
new file mode 100644
index 00000000000..c817da257ec
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.GZ and use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithAlgoGZAndUseDataEncoding extends TestMultiColumnScanner {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.class);
+
+  @Parameters
+  public static Collection<Object[]> parameters() {
+    return TestMultiColumnScanner.generateParams(Algorithm.GZ, true);
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java
new file mode 100644
index 00000000000..4f6aa90b8c5
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.NONE and no use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithNoneAndNoDataEncoding extends TestMultiColumnScanner {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestMultiColumnScannerWithNoneAndNoDataEncoding.class);
+
+  @Parameters
+  public static Collection<Object[]> parameters() {
+    return TestMultiColumnScanner.generateParams(Algorithm.NONE, false);
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java
new file mode 100644
index 00000000000..f1fd30d41cf
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.NONE and no use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithNoneAndUseDataEncoding extends TestMultiColumnScanner {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestMultiColumnScannerWithNoneAndUseDataEncoding.class);
+
+  @Parameters
+  public static Collection<Object[]> parameters() {
+    return TestMultiColumnScanner.generateParams(Algorithm.NONE, true);
+  }
+}