HBASE-14283 Reverse scan doesn’t work with HFile inline index/bloom blocks

Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
Ben Lau 2015-10-07 16:46:16 -07:00 committed by Andrew Purtell
parent efb82957da
commit 5c56e239c3
2 changed files with 194 additions and 2 deletions

View File

@ -859,9 +859,12 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
reader.returnBlock(seekToBlock); reader.returnBlock(seekToBlock);
// It is important that we compute and pass onDiskSize to the block // It is important that we compute and pass onDiskSize to the block
// reader so that it does not have to read the header separately to // reader so that it does not have to read the header separately to
// figure out the size. // figure out the size. Currently, we do not have a way to do this
// correctly in the general case however.
// TODO: See https://issues.apache.org/jira/browse/HBASE-14576
int prevBlockSize = -1;
seekToBlock = reader.readBlock(previousBlockOffset, seekToBlock = reader.readBlock(previousBlockOffset,
seekToBlock.getOffset() - previousBlockOffset, cacheBlocks, prevBlockSize, cacheBlocks,
pread, isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); pread, isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
// TODO shortcut: seek forward in this block to the last key of the // TODO shortcut: seek forward in this block to the last key of the
// block. // block.

View File

@ -0,0 +1,189 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.testclassification.IOTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.BloomFilterFactory;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({IOTests.class, MediumTests.class})
public class TestSeekBeforeWithInlineBlocks {
private static final Log LOG = LogFactory.getLog(TestSeekBeforeWithInlineBlocks.class);
private static final HBaseTestingUtility TEST_UTIL =
new HBaseTestingUtility();
private static final int NUM_KV = 10000;
private static final int DATA_BLOCK_SIZE = 4096;
private static final int BLOOM_BLOCK_SIZE = 1024;
private static final int[] INDEX_CHUNK_SIZES = { 65536, 4096, 1024 };
private static final int[] EXPECTED_NUM_LEVELS = { 1, 2, 3 };
private static final Random RAND = new Random(192537);
private static final byte[] FAM = Bytes.toBytes("family");
private FileSystem fs;
private Configuration conf;
/**
* Scanner.seekBefore() could fail because when seeking to a previous HFile data block, it needs
* to know the size of that data block, which it calculates using current data block offset and
* the previous data block offset. This fails to work when there are leaf-level index blocks in
* the scannable section of the HFile, i.e. starting in HFileV2. This test will try seekBefore()
* on a flat (single-level) and multi-level (2,3) HFile and confirm this bug is now fixed. This
* bug also happens for inline Bloom blocks for the same reasons.
*/
@Test
public void testMultiIndexLevelRandomHFileWithBlooms() throws IOException {
conf = TEST_UTIL.getConfiguration();
// Try out different HFile versions to ensure reverse scan works on each version
for (int hfileVersion = HFile.MIN_FORMAT_VERSION_WITH_TAGS;
hfileVersion <= HFile.MAX_FORMAT_VERSION; hfileVersion++) {
conf.setInt(HFile.FORMAT_VERSION_KEY, hfileVersion);
fs = HFileSystem.get(conf);
// Try out different bloom types because inline Bloom blocks break seekBefore()
for (BloomType bloomType : BloomType.values()) {
// Test out HFile block indices of various sizes/levels
for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; testI++) {
int indexBlockSize = INDEX_CHUNK_SIZES[testI];
int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
LOG.info(String.format("Testing HFileVersion: %s, BloomType: %s, Index Levels: %s",
hfileVersion, bloomType, expectedNumLevels));
conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE);
Cell[] cells = new Cell[NUM_KV];
Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
String.format("testMultiIndexLevelRandomHFileWithBlooms-%s-%s-%s",
hfileVersion, bloomType, testI));
// Disable caching to prevent it from hiding any bugs in block seeks/reads
conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
CacheConfig cacheConf = new CacheConfig(conf);
// Write the HFile
{
HFileContext meta = new HFileContextBuilder()
.withBlockSize(DATA_BLOCK_SIZE)
.build();
StoreFile.Writer storeFileWriter =
new StoreFile.WriterBuilder(conf, cacheConf, fs)
.withFilePath(hfilePath)
.withFileContext(meta)
.withBloomType(bloomType)
.build();
for (int i = 0; i < NUM_KV; i++) {
byte[] row = TestHFileWriterV2.randomOrderedKey(RAND, i);
byte[] qual = TestHFileWriterV2.randomRowOrQualifier(RAND);
byte[] value = TestHFileWriterV2.randomValue(RAND);
KeyValue kv = new KeyValue(row, FAM, qual, value);
storeFileWriter.append(kv);
cells[i] = kv;
}
storeFileWriter.close();
}
// Read the HFile
HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, conf);
// Sanity check the HFile index level
assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels());
// Check that we can seekBefore in either direction and with both pread
// enabled and disabled
for (boolean pread : new boolean[] { false, true }) {
HFileScanner scanner = reader.getScanner(true, pread);
checkNoSeekBefore(cells, scanner, 0);
for (int i = 1; i < NUM_KV; i++) {
checkSeekBefore(cells, scanner, i);
checkCell(cells[i-1], scanner.getCell());
}
assertTrue(scanner.seekTo());
for (int i = NUM_KV - 1; i >= 1; i--) {
checkSeekBefore(cells, scanner, i);
checkCell(cells[i-1], scanner.getCell());
}
checkNoSeekBefore(cells, scanner, 0);
scanner.close();
}
reader.close();
}
}
}
}
private void checkSeekBefore(Cell[] cells, HFileScanner scanner, int i)
throws IOException {
assertEquals("Failed to seek to the key before #" + i + " ("
+ CellUtil.getCellKeyAsString(cells[i]) + ")", true,
scanner.seekBefore(cells[i]));
}
private void checkNoSeekBefore(Cell[] cells, HFileScanner scanner, int i)
throws IOException {
assertEquals("Incorrectly succeeded in seeking to before first key ("
+ CellUtil.getCellKeyAsString(cells[i]) + ")", false,
scanner.seekBefore(cells[i]));
}
/** Check a key/value pair after it was read by the reader */
private void checkCell(Cell expected, Cell actual) {
assertTrue(String.format("Expected key %s, but was %s",
CellUtil.getCellKeyAsString(expected), CellUtil.getCellKeyAsString(actual)),
CellUtil.equals(expected, actual));
}
}