diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java index f89098a8f3b..5a491723bd3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java @@ -34,9 +34,6 @@ import java.util.Random; import java.util.Set; import java.util.TreeSet; -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; @@ -49,22 +46,28 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; -import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; +import org.junit.runners.Parameterized.Parameter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** - * Tests optimized scanning of multiple columns. + * Tests optimized scanning of multiple columns.
+ * We separated the big test into several sub-class UT, because When in ROWCOL bloom type, we will + * test the row-col bloom filter frequently for saving HDFS seek once we switch from one column to + * another in our UT. It's cpu time consuming (~45s for each case), so moved the ROWCOL case into a + * separated LargeTests to avoid timeout failure.
+ *
+ * To be clear: In TestMultiColumnScanner, we will flush 10 (NUM_FLUSHES=10) HFiles here, and the + * table will put ~1000 cells (rows=20, ts=6, qualifiers=8, total=20*6*8 ~ 1000) . Each full table + * scan will check the ROWCOL bloom filter 20 (rows)* 8 (column) * 10 (hfiles)= 1600 times, beside + * it will scan the full table 6*2^8=1536 times, so finally will have 1600*1536=2457600 bloom filter + * testing. (See HBASE-21520) */ -@RunWith(Parameterized.class) -@Category(MediumTests.class) -public class TestMultiColumnScanner { +public abstract class TestMultiColumnScanner { - private static final Log LOG = LogFactory.getLog(TestMultiColumnScanner.class); + private static final Logger LOG = LoggerFactory.getLogger(TestMultiColumnScanner.class); private static final String TABLE_NAME = TestMultiColumnScanner.class.getSimpleName(); @@ -97,20 +100,19 @@ public class TestMultiColumnScanner { /** The probability that a column is skipped in a store file. */ private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7; - /** The probability of skipping a column in a single row */ - private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1; - - /** The probability of skipping a column everywhere */ - private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1; - /** The probability to delete a row/column pair */ private static final double DELETE_PROBABILITY = 0.02; private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU(); - private final Compression.Algorithm comprAlgo; - private final BloomType bloomType; - private final DataBlockEncoding dataBlockEncoding; + @Parameter(0) + public Compression.Algorithm comprAlgo; + + @Parameter(1) + public BloomType bloomType; + + @Parameter(2) + public DataBlockEncoding dataBlockEncoding; // Some static sanity-checking. static { @@ -121,27 +123,17 @@ public class TestMultiColumnScanner { assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]); } - @Parameters - public static final Collection parameters() { - List parameters = new ArrayList(); - for (Object[] bloomAndCompressionParams : - HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS) { - for (boolean useDataBlockEncoding : new boolean[]{false, true}) { - parameters.add(ArrayUtils.add(bloomAndCompressionParams, - useDataBlockEncoding)); - } + public static Collection generateParams(Compression.Algorithm algo, + boolean useDataBlockEncoding) { + List parameters = new ArrayList<>(); + for (BloomType bloomType : BloomType.values()) { + DataBlockEncoding dataBlockEncoding = + useDataBlockEncoding ? DataBlockEncoding.PREFIX : DataBlockEncoding.NONE; + parameters.add(new Object[] { algo, bloomType, dataBlockEncoding }); } return parameters; } - public TestMultiColumnScanner(Compression.Algorithm comprAlgo, - BloomType bloomType, boolean useDataBlockEncoding) { - this.comprAlgo = comprAlgo; - this.bloomType = bloomType; - this.dataBlockEncoding = useDataBlockEncoding ? DataBlockEncoding.PREFIX : - DataBlockEncoding.NONE; - } - @Test public void testMultiColumnScanner() throws IOException { HRegion region = TEST_UTIL.createTestRegion(TABLE_NAME, @@ -161,24 +153,6 @@ public class TestMultiColumnScanner { Map lastDelTimeMap = new HashMap(); Random rand = new Random(29372937L); - Set rowQualSkip = new HashSet(); - - // Skip some columns in some rows. We need to test scanning over a set - // of columns when some of the columns are not there. - for (String row : rows) - for (String qual : qualifiers) - if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) { - LOG.info("Skipping " + qual + " in row " + row); - rowQualSkip.add(rowQualKey(row, qual)); - } - - // Also skip some columns in all rows. - for (String qual : qualifiers) - if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) { - LOG.info("Skipping " + qual + " in all rows"); - for (String row : rows) - rowQualSkip.add(rowQualKey(row, qual)); - } for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) { for (String qual : qualifiers) { @@ -307,10 +281,6 @@ public class TestMultiColumnScanner { kv.getQualifierLength()); } - private static String rowQualKey(String row, String qual) { - return row + "_" + qual; - } - static String createValue(String row, String qual, long ts) { return "value_for_" + row + "_" + qual + "_" + ts; } @@ -330,10 +300,7 @@ public class TestMultiColumnScanner { lst.add(sb.toString()); } - return lst; } - - } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java new file mode 100644 index 00000000000..f9a41089503 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.Collection; + +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test case for Compression.Algorithm.GZ and no use data block encoding. + * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner + */ +@RunWith(Parameterized.class) +@Category({ RegionServerTests.class, LargeTests.class }) +public class TestMultiColumnScannerWithAlgoGZAndNoDataEncoding extends TestMultiColumnScanner { + + @Parameters + public static Collection parameters() { + return TestMultiColumnScanner.generateParams(Algorithm.GZ, false); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java new file mode 100644 index 00000000000..2cecde99f5a --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.Collection; + +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test case for Compression.Algorithm.GZ and use data block encoding. + * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner + */ +@RunWith(Parameterized.class) +@Category({ RegionServerTests.class, LargeTests.class }) +public class TestMultiColumnScannerWithAlgoGZAndUseDataEncoding extends TestMultiColumnScanner { + + @Parameters + public static Collection parameters() { + return TestMultiColumnScanner.generateParams(Algorithm.GZ, true); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java new file mode 100644 index 00000000000..ef23d3517c8 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.Collection; + +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test case for Compression.Algorithm.NONE and no use data block encoding. + * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner + */ +@RunWith(Parameterized.class) +@Category({ RegionServerTests.class, LargeTests.class }) +public class TestMultiColumnScannerWithNoneAndNoDataEncoding extends TestMultiColumnScanner { + + @Parameters + public static Collection parameters() { + return TestMultiColumnScanner.generateParams(Algorithm.NONE, false); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java new file mode 100644 index 00000000000..55fd01c751c --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.Collection; + +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test case for Compression.Algorithm.NONE and no use data block encoding. + * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner + */ +@RunWith(Parameterized.class) +@Category({ RegionServerTests.class, LargeTests.class }) +public class TestMultiColumnScannerWithNoneAndUseDataEncoding extends TestMultiColumnScanner { + + @Parameters + public static Collection parameters() { + return TestMultiColumnScanner.generateParams(Algorithm.NONE, true); + } +}