HBASE-5864 Error while reading from hfile in 0.94 (Ram)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1331058 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f854df4223
commit
4ecfcbde5d
|
@ -1099,10 +1099,9 @@ public class HFileBlock extends SchemaConfigured implements Cacheable {
|
|||
|
||||
/**
|
||||
* Similar to {@link #nextBlock()} but checks block type, throws an
|
||||
* exception if incorrect, and returns the data portion of the block as
|
||||
* an input stream.
|
||||
* exception if incorrect, and returns the HFile block
|
||||
*/
|
||||
DataInputStream nextBlockAsStream(BlockType blockType) throws IOException;
|
||||
HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException;
|
||||
}
|
||||
|
||||
/** A full-fledged reader with iteration ability. */
|
||||
|
@ -1200,14 +1199,14 @@ public class HFileBlock extends SchemaConfigured implements Cacheable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DataInputStream nextBlockAsStream(BlockType blockType)
|
||||
public HFileBlock nextBlockWithBlockType(BlockType blockType)
|
||||
throws IOException {
|
||||
HFileBlock blk = nextBlock();
|
||||
if (blk.getBlockType() != blockType) {
|
||||
throw new IOException("Expected block of type " + blockType
|
||||
+ " but found " + blk.getBlockType());
|
||||
}
|
||||
return blk.getByteStream();
|
||||
return blk;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -533,24 +533,43 @@ public class HFileBlockIndex {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read in the root-level index from the given input stream. Must match
|
||||
* what was written into the root level by
|
||||
* {@link BlockIndexWriter#writeIndexBlocks(FSDataOutputStream)} at the
|
||||
* offset that function returned.
|
||||
*
|
||||
* @param blk the HFile block
|
||||
* @param numEntries the number of root-level index entries
|
||||
* @return the buffered input stream or wrapped byte input stream
|
||||
* @throws IOException
|
||||
*/
|
||||
public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
|
||||
DataInputStream in = blk.getByteStream();
|
||||
readRootIndex(in, numEntries);
|
||||
return in;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the root-level metadata of a multi-level block index. Based on
|
||||
* {@link #readRootIndex(DataInput, int)}, but also reads metadata
|
||||
* necessary to compute the mid-key in a multi-level index.
|
||||
*
|
||||
* @param in the buffered or byte input stream to read from
|
||||
* @param blk the HFile block
|
||||
* @param numEntries the number of root-level index entries
|
||||
* @throws IOException
|
||||
*/
|
||||
public void readMultiLevelIndexRoot(DataInputStream in,
|
||||
public void readMultiLevelIndexRoot(HFileBlock blk,
|
||||
final int numEntries) throws IOException {
|
||||
readRootIndex(in, numEntries);
|
||||
if (in.available() < MID_KEY_METADATA_SIZE) {
|
||||
DataInputStream in = readRootIndex(blk, numEntries);
|
||||
// after reading the root index the checksum bytes have to
|
||||
// be subtracted to know if the mid key exists.
|
||||
int checkSumBytes = blk.totalChecksumBytes();
|
||||
if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) {
|
||||
// No mid-key metadata available.
|
||||
return;
|
||||
}
|
||||
|
||||
midLeafBlockOffset = in.readLong();
|
||||
midLeafBlockOnDiskSize = in.readInt();
|
||||
midKeyEntry = in.readInt();
|
||||
|
@ -763,7 +782,7 @@ public class HFileBlockIndex {
|
|||
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("Wrote a " + numLevels + "-level index with root level at pos "
|
||||
+ out.getPos() + ", " + rootChunk.getNumEntries()
|
||||
+ rootLevelIndexPos + ", " + rootChunk.getNumEntries()
|
||||
+ " root-level entries, " + totalNumEntries + " total entries, "
|
||||
+ StringUtils.humanReadableInt(this.totalBlockOnDiskSize) +
|
||||
" on-disk size, "
|
||||
|
|
|
@ -126,17 +126,17 @@ public class HFileReaderV2 extends AbstractHFileReader {
|
|||
// Data index. We also read statistics about the block index written after
|
||||
// the root level.
|
||||
dataBlockIndexReader.readMultiLevelIndexRoot(
|
||||
blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
|
||||
blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
|
||||
trailer.getDataIndexCount());
|
||||
|
||||
// Meta index.
|
||||
metaBlockIndexReader.readRootIndex(
|
||||
blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
|
||||
blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
|
||||
trailer.getMetaIndexCount());
|
||||
|
||||
// File info
|
||||
fileInfo = new FileInfo();
|
||||
fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO));
|
||||
fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
|
||||
lastKey = fileInfo.get(FileInfo.LASTKEY);
|
||||
avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN));
|
||||
avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN));
|
||||
|
|
|
@ -176,7 +176,7 @@ public class TestHFileBlockIndex {
|
|||
Bytes.BYTES_RAWCOMPARATOR, numLevels, brw);
|
||||
|
||||
indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
|
||||
fileSize).nextBlockAsStream(BlockType.ROOT_INDEX), numRootEntries);
|
||||
fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
|
||||
|
||||
long prevOffset = -1;
|
||||
int i = 0;
|
||||
|
|
|
@ -37,8 +37,7 @@ import org.apache.hadoop.fs.FSDataInputStream;
|
|||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.*;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm;
|
||||
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.io.RawComparator;
|
||||
|
@ -72,14 +71,30 @@ public class TestHFileWriterV2 {
|
|||
@Test
|
||||
public void testHFileFormatV2() throws IOException {
|
||||
Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
|
||||
"testHFileFormatV2");
|
||||
"testHFileFormatV2");
|
||||
final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ;
|
||||
final int entryCount = 10000;
|
||||
writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testMidKeyInHFile() throws IOException{
|
||||
Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
|
||||
"testMidKeyInHFile");
|
||||
Compression.Algorithm compressAlgo = Compression.Algorithm.NONE;
|
||||
int entryCount = 50000;
|
||||
writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true);
|
||||
}
|
||||
|
||||
private void writeDataAndReadFromHFile(Path hfilePath,
|
||||
Algorithm compressAlgo, int entryCount, boolean findMidKey) throws IOException {
|
||||
|
||||
final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ;
|
||||
HFileWriterV2 writer = (HFileWriterV2)
|
||||
new HFileWriterV2.WriterFactoryV2(conf, new CacheConfig(conf))
|
||||
.withPath(fs, hfilePath)
|
||||
.withBlockSize(4096)
|
||||
.withCompression(COMPRESS_ALGO)
|
||||
.withCompression(compressAlgo)
|
||||
.withComparator(KeyValue.KEY_COMPARATOR)
|
||||
.create();
|
||||
|
||||
|
@ -88,11 +103,10 @@ public class TestHFileWriterV2 {
|
|||
|
||||
Random rand = new Random(9713312); // Just a fixed seed.
|
||||
|
||||
final int ENTRY_COUNT = 10000;
|
||||
List<byte[]> keys = new ArrayList<byte[]>();
|
||||
List<byte[]> values = new ArrayList<byte[]>();
|
||||
|
||||
for (int i = 0; i < ENTRY_COUNT; ++i) {
|
||||
for (int i = 0; i < entryCount; ++i) {
|
||||
byte[] keyBytes = randomOrderedKey(rand, i);
|
||||
|
||||
// A random-length random value.
|
||||
|
@ -113,6 +127,7 @@ public class TestHFileWriterV2 {
|
|||
writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
|
||||
|
||||
writer.close();
|
||||
|
||||
|
||||
FSDataInputStream fsdis = fs.open(hfilePath);
|
||||
|
||||
|
@ -124,10 +139,10 @@ public class TestHFileWriterV2 {
|
|||
FixedFileTrailer.readFromStream(fsdis, fileSize);
|
||||
|
||||
assertEquals(2, trailer.getMajorVersion());
|
||||
assertEquals(ENTRY_COUNT, trailer.getEntryCount());
|
||||
assertEquals(entryCount, trailer.getEntryCount());
|
||||
|
||||
HFileBlock.FSReader blockReader =
|
||||
new HFileBlock.FSReaderV2(fsdis, COMPRESS_ALGO, fileSize);
|
||||
new HFileBlock.FSReaderV2(fsdis, compressAlgo, fileSize);
|
||||
// Comparator class name is stored in the trailer in version 2.
|
||||
RawComparator<byte []> comparator = trailer.createComparator();
|
||||
HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
|
||||
|
@ -143,16 +158,21 @@ public class TestHFileWriterV2 {
|
|||
// Data index. We also read statistics about the block index written after
|
||||
// the root level.
|
||||
dataBlockIndexReader.readMultiLevelIndexRoot(
|
||||
blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
|
||||
blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
|
||||
trailer.getDataIndexCount());
|
||||
|
||||
|
||||
if (findMidKey) {
|
||||
byte[] midkey = dataBlockIndexReader.midkey();
|
||||
assertNotNull("Midkey should not be null", midkey);
|
||||
}
|
||||
|
||||
// Meta index.
|
||||
metaBlockIndexReader.readRootIndex(
|
||||
blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
|
||||
blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(),
|
||||
trailer.getMetaIndexCount());
|
||||
// File info
|
||||
FileInfo fileInfo = new FileInfo();
|
||||
fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO));
|
||||
fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
|
||||
byte [] keyValueFormatVersion = fileInfo.get(
|
||||
HFileWriterV2.KEY_VALUE_VERSION);
|
||||
boolean includeMemstoreTS = keyValueFormatVersion != null &&
|
||||
|
@ -200,7 +220,7 @@ public class TestHFileWriterV2 {
|
|||
}
|
||||
LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead="
|
||||
+ blocksRead);
|
||||
assertEquals(ENTRY_COUNT, entriesRead);
|
||||
assertEquals(entryCount, entriesRead);
|
||||
|
||||
// Meta blocks. We can scan until the load-on-open data offset (which is
|
||||
// the root block index offset in version 2) because we are not testing
|
||||
|
@ -226,6 +246,7 @@ public class TestHFileWriterV2 {
|
|||
fsdis.close();
|
||||
}
|
||||
|
||||
|
||||
// Static stuff used by various HFile v2 unit tests
|
||||
|
||||
private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";
|
||||
|
|
Loading…
Reference in New Issue