From ac0b3bb5477612cb8844c4ef10fa2be0f1d1a025 Mon Sep 17 00:00:00 2001 From: huzheng Date: Thu, 13 Dec 2018 15:04:12 +0800 Subject: [PATCH] HBASE-21520 TestMultiColumnScanner cost long time when using ROWCOL bloom type --- .../regionserver/TestMultiColumnScanner.java | 94 ++++++------------- ...umnScannerWithAlgoGZAndNoDataEncoding.java | 48 ++++++++++ ...mnScannerWithAlgoGZAndUseDataEncoding.java | 48 ++++++++++ ...olumnScannerWithNoneAndNoDataEncoding.java | 48 ++++++++++ ...lumnScannerWithNoneAndUseDataEncoding.java | 48 ++++++++++ 5 files changed, 219 insertions(+), 67 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java index 2ff0d8c24a5..bb97c9c131a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java @@ -32,11 +32,9 @@ import java.util.Map; import java.util.Random; import java.util.Set; import java.util.TreeSet; -import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparatorImpl; import org.apache.hadoop.hbase.CellUtil; -import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.KeyValue; @@ -47,29 +45,27 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; -import org.apache.hadoop.hbase.testclassification.MediumTests; -import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.util.BloomFilterUtil; import org.apache.hadoop.hbase.util.Bytes; -import org.junit.ClassRule; import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; +import org.junit.runners.Parameterized.Parameter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Tests optimized scanning of multiple columns. + * Tests optimized scanning of multiple columns.
+ * We separated the big test into several sub-class UT, because When in ROWCOL bloom type, we will + * test the row-col bloom filter frequently for saving HDFS seek once we switch from one column to + * another in our UT. It's cpu time consuming (~45s for each case), so moved the ROWCOL case into a + * separated LargeTests to avoid timeout failure.
+ *
+ * To be clear: In TestMultiColumnScanner, we will flush 10 (NUM_FLUSHES=10) HFiles here, and the + * table will put ~1000 cells (rows=20, ts=6, qualifiers=8, total=20*6*8 ~ 1000) . Each full table + * scan will check the ROWCOL bloom filter 20 (rows)* 8 (column) * 10 (hfiles)= 1600 times, beside + * it will scan the full table 6*2^8=1536 times, so finally will have 1600*1536=2457600 bloom filter + * testing. (See HBASE-21520) */ -@RunWith(Parameterized.class) -@Category({RegionServerTests.class, MediumTests.class}) -public class TestMultiColumnScanner { - - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestMultiColumnScanner.class); +public abstract class TestMultiColumnScanner { private static final Logger LOG = LoggerFactory.getLogger(TestMultiColumnScanner.class); @@ -104,20 +100,19 @@ public class TestMultiColumnScanner { /** The probability that a column is skipped in a store file. */ private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7; - /** The probability of skipping a column in a single row */ - private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1; - - /** The probability of skipping a column everywhere */ - private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1; - /** The probability to delete a row/column pair */ private static final double DELETE_PROBABILITY = 0.02; private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU(); - private final Compression.Algorithm comprAlgo; - private final BloomType bloomType; - private final DataBlockEncoding dataBlockEncoding; + @Parameter(0) + public Compression.Algorithm comprAlgo; + + @Parameter(1) + public BloomType bloomType; + + @Parameter(2) + public DataBlockEncoding dataBlockEncoding; // Some static sanity-checking. static { @@ -128,27 +123,17 @@ public class TestMultiColumnScanner { assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]); } - @Parameters - public static final Collection parameters() { + public static Collection generateParams(Compression.Algorithm algo, + boolean useDataBlockEncoding) { List parameters = new ArrayList<>(); - for (Object[] bloomAndCompressionParams : - HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS) { - for (boolean useDataBlockEncoding : new boolean[]{false, true}) { - parameters.add(ArrayUtils.add(bloomAndCompressionParams, - useDataBlockEncoding)); - } + for (BloomType bloomType : BloomType.values()) { + DataBlockEncoding dataBlockEncoding = + useDataBlockEncoding ? DataBlockEncoding.PREFIX : DataBlockEncoding.NONE; + parameters.add(new Object[] { algo, bloomType, dataBlockEncoding }); } return parameters; } - public TestMultiColumnScanner(Compression.Algorithm comprAlgo, - BloomType bloomType, boolean useDataBlockEncoding) { - this.comprAlgo = comprAlgo; - this.bloomType = bloomType; - this.dataBlockEncoding = useDataBlockEncoding ? DataBlockEncoding.PREFIX : - DataBlockEncoding.NONE; - } - @Test public void testMultiColumnScanner() throws IOException { TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10); @@ -170,24 +155,6 @@ public class TestMultiColumnScanner { Map lastDelTimeMap = new HashMap<>(); Random rand = new Random(29372937L); - Set rowQualSkip = new HashSet<>(); - - // Skip some columns in some rows. We need to test scanning over a set - // of columns when some of the columns are not there. - for (String row : rows) - for (String qual : qualifiers) - if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) { - LOG.info("Skipping " + qual + " in row " + row); - rowQualSkip.add(rowQualKey(row, qual)); - } - - // Also skip some columns in all rows. - for (String qual : qualifiers) - if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) { - LOG.info("Skipping " + qual + " in all rows"); - for (String row : rows) - rowQualSkip.add(rowQualKey(row, qual)); - } for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) { for (String qual : qualifiers) { @@ -316,10 +283,6 @@ public class TestMultiColumnScanner { kv.getQualifierLength()); } - private static String rowQualKey(String row, String qual) { - return row + "_" + qual; - } - static String createValue(String row, String qual, long ts) { return "value_for_" + row + "_" + qual + "_" + ts; } @@ -339,10 +302,7 @@ public class TestMultiColumnScanner { lst.add(sb.toString()); } - return lst; } - - } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java new file mode 100644 index 00000000000..cc68c115390 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.Collection; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.junit.ClassRule; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test case for Compression.Algorithm.GZ and no use data block encoding. + * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner + */ +@RunWith(Parameterized.class) +@Category({ RegionServerTests.class, LargeTests.class }) +public class TestMultiColumnScannerWithAlgoGZAndNoDataEncoding extends TestMultiColumnScanner { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.class); + + @Parameters + public static Collection parameters() { + return TestMultiColumnScanner.generateParams(Algorithm.GZ, false); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java new file mode 100644 index 00000000000..c817da257ec --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.Collection; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.junit.ClassRule; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test case for Compression.Algorithm.GZ and use data block encoding. + * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner + */ +@RunWith(Parameterized.class) +@Category({ RegionServerTests.class, LargeTests.class }) +public class TestMultiColumnScannerWithAlgoGZAndUseDataEncoding extends TestMultiColumnScanner { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.class); + + @Parameters + public static Collection parameters() { + return TestMultiColumnScanner.generateParams(Algorithm.GZ, true); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java new file mode 100644 index 00000000000..4f6aa90b8c5 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.Collection; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.junit.ClassRule; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test case for Compression.Algorithm.NONE and no use data block encoding. + * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner + */ +@RunWith(Parameterized.class) +@Category({ RegionServerTests.class, LargeTests.class }) +public class TestMultiColumnScannerWithNoneAndNoDataEncoding extends TestMultiColumnScanner { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestMultiColumnScannerWithNoneAndNoDataEncoding.class); + + @Parameters + public static Collection parameters() { + return TestMultiColumnScanner.generateParams(Algorithm.NONE, false); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java new file mode 100644 index 00000000000..f1fd30d41cf --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.Collection; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.junit.ClassRule; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test case for Compression.Algorithm.NONE and no use data block encoding. + * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner + */ +@RunWith(Parameterized.class) +@Category({ RegionServerTests.class, LargeTests.class }) +public class TestMultiColumnScannerWithNoneAndUseDataEncoding extends TestMultiColumnScanner { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestMultiColumnScannerWithNoneAndUseDataEncoding.class); + + @Parameters + public static Collection parameters() { + return TestMultiColumnScanner.generateParams(Algorithm.NONE, true); + } +}