HBASE-21520 TestMultiColumnScanner cost long time when using ROWCOL bloom type

This commit is contained in:
huzheng 2018-12-13 15:04:12 +08:00
parent 5c8c945e34
commit 049307c9a7
5 changed files with 199 additions and 64 deletions

View File

@ -34,9 +34,6 @@ import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.CellUtil;
@ -49,22 +46,28 @@ import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test; import org.junit.Test;
import org.junit.experimental.categories.Category; import org.junit.runners.Parameterized.Parameter;
import org.junit.runner.RunWith; import org.slf4j.Logger;
import org.junit.runners.Parameterized; import org.slf4j.LoggerFactory;
import org.junit.runners.Parameterized.Parameters;
/** /**
* Tests optimized scanning of multiple columns. * Tests optimized scanning of multiple columns. <br>
* We separated the big test into several sub-class UT, because When in ROWCOL bloom type, we will
* test the row-col bloom filter frequently for saving HDFS seek once we switch from one column to
* another in our UT. It's cpu time consuming (~45s for each case), so moved the ROWCOL case into a
* separated LargeTests to avoid timeout failure. <br>
* <br>
* To be clear: In TestMultiColumnScanner, we will flush 10 (NUM_FLUSHES=10) HFiles here, and the
* table will put ~1000 cells (rows=20, ts=6, qualifiers=8, total=20*6*8 ~ 1000) . Each full table
* scan will check the ROWCOL bloom filter 20 (rows)* 8 (column) * 10 (hfiles)= 1600 times, beside
* it will scan the full table 6*2^8=1536 times, so finally will have 1600*1536=2457600 bloom filter
* testing. (See HBASE-21520)
*/ */
@RunWith(Parameterized.class) public abstract class TestMultiColumnScanner {
@Category(MediumTests.class)
public class TestMultiColumnScanner {
private static final Log LOG = LogFactory.getLog(TestMultiColumnScanner.class); private static final Logger LOG = LoggerFactory.getLogger(TestMultiColumnScanner.class);
private static final String TABLE_NAME = private static final String TABLE_NAME =
TestMultiColumnScanner.class.getSimpleName(); TestMultiColumnScanner.class.getSimpleName();
@ -97,20 +100,19 @@ public class TestMultiColumnScanner {
/** The probability that a column is skipped in a store file. */ /** The probability that a column is skipped in a store file. */
private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7; private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7;
/** The probability of skipping a column in a single row */
private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1;
/** The probability of skipping a column everywhere */
private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1;
/** The probability to delete a row/column pair */ /** The probability to delete a row/column pair */
private static final double DELETE_PROBABILITY = 0.02; private static final double DELETE_PROBABILITY = 0.02;
private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU(); private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
private final Compression.Algorithm comprAlgo; @Parameter(0)
private final BloomType bloomType; public Compression.Algorithm comprAlgo;
private final DataBlockEncoding dataBlockEncoding;
@Parameter(1)
public BloomType bloomType;
@Parameter(2)
public DataBlockEncoding dataBlockEncoding;
// Some static sanity-checking. // Some static sanity-checking.
static { static {
@ -121,27 +123,17 @@ public class TestMultiColumnScanner {
assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]); assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]);
} }
@Parameters public static Collection<Object[]> generateParams(Compression.Algorithm algo,
public static final Collection<Object[]> parameters() { boolean useDataBlockEncoding) {
List<Object[]> parameters = new ArrayList<Object[]>(); List<Object[]> parameters = new ArrayList<>();
for (Object[] bloomAndCompressionParams : for (BloomType bloomType : BloomType.values()) {
HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS) { DataBlockEncoding dataBlockEncoding =
for (boolean useDataBlockEncoding : new boolean[]{false, true}) { useDataBlockEncoding ? DataBlockEncoding.PREFIX : DataBlockEncoding.NONE;
parameters.add(ArrayUtils.add(bloomAndCompressionParams, parameters.add(new Object[] { algo, bloomType, dataBlockEncoding });
useDataBlockEncoding));
}
} }
return parameters; return parameters;
} }
public TestMultiColumnScanner(Compression.Algorithm comprAlgo,
BloomType bloomType, boolean useDataBlockEncoding) {
this.comprAlgo = comprAlgo;
this.bloomType = bloomType;
this.dataBlockEncoding = useDataBlockEncoding ? DataBlockEncoding.PREFIX :
DataBlockEncoding.NONE;
}
@Test @Test
public void testMultiColumnScanner() throws IOException { public void testMultiColumnScanner() throws IOException {
HRegion region = TEST_UTIL.createTestRegion(TABLE_NAME, HRegion region = TEST_UTIL.createTestRegion(TABLE_NAME,
@ -161,24 +153,6 @@ public class TestMultiColumnScanner {
Map<String, Long> lastDelTimeMap = new HashMap<String, Long>(); Map<String, Long> lastDelTimeMap = new HashMap<String, Long>();
Random rand = new Random(29372937L); Random rand = new Random(29372937L);
Set<String> rowQualSkip = new HashSet<String>();
// Skip some columns in some rows. We need to test scanning over a set
// of columns when some of the columns are not there.
for (String row : rows)
for (String qual : qualifiers)
if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) {
LOG.info("Skipping " + qual + " in row " + row);
rowQualSkip.add(rowQualKey(row, qual));
}
// Also skip some columns in all rows.
for (String qual : qualifiers)
if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) {
LOG.info("Skipping " + qual + " in all rows");
for (String row : rows)
rowQualSkip.add(rowQualKey(row, qual));
}
for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) { for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) {
for (String qual : qualifiers) { for (String qual : qualifiers) {
@ -307,10 +281,6 @@ public class TestMultiColumnScanner {
kv.getQualifierLength()); kv.getQualifierLength());
} }
private static String rowQualKey(String row, String qual) {
return row + "_" + qual;
}
static String createValue(String row, String qual, long ts) { static String createValue(String row, String qual, long ts) {
return "value_for_" + row + "_" + qual + "_" + ts; return "value_for_" + row + "_" + qual + "_" + ts;
} }
@ -330,10 +300,7 @@ public class TestMultiColumnScanner {
lst.add(sb.toString()); lst.add(sb.toString());
} }
return lst; return lst;
} }
} }

View File

@ -0,0 +1,42 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.util.Collection;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
/**
* Test case for Compression.Algorithm.GZ and no use data block encoding.
* @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
*/
@RunWith(Parameterized.class)
@Category({ RegionServerTests.class, LargeTests.class })
public class TestMultiColumnScannerWithAlgoGZAndNoDataEncoding extends TestMultiColumnScanner {
@Parameters
public static Collection<Object[]> parameters() {
return TestMultiColumnScanner.generateParams(Algorithm.GZ, false);
}
}

View File

@ -0,0 +1,42 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.util.Collection;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
/**
* Test case for Compression.Algorithm.GZ and use data block encoding.
* @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
*/
@RunWith(Parameterized.class)
@Category({ RegionServerTests.class, LargeTests.class })
public class TestMultiColumnScannerWithAlgoGZAndUseDataEncoding extends TestMultiColumnScanner {
@Parameters
public static Collection<Object[]> parameters() {
return TestMultiColumnScanner.generateParams(Algorithm.GZ, true);
}
}

View File

@ -0,0 +1,42 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.util.Collection;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
/**
* Test case for Compression.Algorithm.NONE and no use data block encoding.
* @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
*/
@RunWith(Parameterized.class)
@Category({ RegionServerTests.class, LargeTests.class })
public class TestMultiColumnScannerWithNoneAndNoDataEncoding extends TestMultiColumnScanner {
@Parameters
public static Collection<Object[]> parameters() {
return TestMultiColumnScanner.generateParams(Algorithm.NONE, false);
}
}

View File

@ -0,0 +1,42 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.util.Collection;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
/**
* Test case for Compression.Algorithm.NONE and no use data block encoding.
* @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
*/
@RunWith(Parameterized.class)
@Category({ RegionServerTests.class, LargeTests.class })
public class TestMultiColumnScannerWithNoneAndUseDataEncoding extends TestMultiColumnScanner {
@Parameters
public static Collection<Object[]> parameters() {
return TestMultiColumnScanner.generateParams(Algorithm.NONE, true);
}
}