HBASE-5864 Error while reading from hfile in 0.94 (Ram)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1331058 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
larsh 2012-04-26 20:08:23 +00:00
parent f854df4223
commit 4ecfcbde5d
5 changed files with 68 additions and 29 deletions

View File

@ -1099,10 +1099,9 @@ public class HFileBlock extends SchemaConfigured implements Cacheable {
/**
* Similar to {@link #nextBlock()} but checks block type, throws an
* exception if incorrect, and returns the data portion of the block as
* an input stream.
* exception if incorrect, and returns the HFile block
*/
DataInputStream nextBlockAsStream(BlockType blockType) throws IOException;
HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException;
}
/** A full-fledged reader with iteration ability. */
@ -1200,14 +1199,14 @@ public class HFileBlock extends SchemaConfigured implements Cacheable {
}
@Override
public DataInputStream nextBlockAsStream(BlockType blockType)
public HFileBlock nextBlockWithBlockType(BlockType blockType)
throws IOException {
HFileBlock blk = nextBlock();
if (blk.getBlockType() != blockType) {
throw new IOException("Expected block of type " + blockType
+ " but found " + blk.getBlockType());
}
return blk.getByteStream();
return blk;
}
};
}

View File

@ -533,24 +533,43 @@ public class HFileBlockIndex {
}
}
}
/**
* Read in the root-level index from the given input stream. Must match
* what was written into the root level by
* {@link BlockIndexWriter#writeIndexBlocks(FSDataOutputStream)} at the
* offset that function returned.
*
* @param blk the HFile block
* @param numEntries the number of root-level index entries
* @return the buffered input stream or wrapped byte input stream
* @throws IOException
*/
public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
DataInputStream in = blk.getByteStream();
readRootIndex(in, numEntries);
return in;
}
/**
* Read the root-level metadata of a multi-level block index. Based on
* {@link #readRootIndex(DataInput, int)}, but also reads metadata
* necessary to compute the mid-key in a multi-level index.
*
* @param in the buffered or byte input stream to read from
* @param blk the HFile block
* @param numEntries the number of root-level index entries
* @throws IOException
*/
public void readMultiLevelIndexRoot(DataInputStream in,
public void readMultiLevelIndexRoot(HFileBlock blk,
final int numEntries) throws IOException {
readRootIndex(in, numEntries);
if (in.available() < MID_KEY_METADATA_SIZE) {
DataInputStream in = readRootIndex(blk, numEntries);
// after reading the root index the checksum bytes have to
// be subtracted to know if the mid key exists.
int checkSumBytes = blk.totalChecksumBytes();
if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) {
// No mid-key metadata available.
return;
}
midLeafBlockOffset = in.readLong();
midLeafBlockOnDiskSize = in.readInt();
midKeyEntry = in.readInt();
@ -763,7 +782,7 @@ public class HFileBlockIndex {
if (LOG.isTraceEnabled()) {
LOG.trace("Wrote a " + numLevels + "-level index with root level at pos "
+ out.getPos() + ", " + rootChunk.getNumEntries()
+ rootLevelIndexPos + ", " + rootChunk.getNumEntries()
+ " root-level entries, " + totalNumEntries + " total entries, "
+ StringUtils.humanReadableInt(this.totalBlockOnDiskSize) +
" on-disk size, "

View File

@ -126,17 +126,17 @@ public class HFileReaderV2 extends AbstractHFileReader {
// Data index. We also read statistics about the block index written after
// the root level.
dataBlockIndexReader.readMultiLevelIndexRoot(
blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
trailer.getDataIndexCount());
// Meta index.
metaBlockIndexReader.readRootIndex(
blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
trailer.getMetaIndexCount());
// File info
fileInfo = new FileInfo();
fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO));
fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
lastKey = fileInfo.get(FileInfo.LASTKEY);
avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN));
avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN));

View File

@ -176,7 +176,7 @@ public class TestHFileBlockIndex {
Bytes.BYTES_RAWCOMPARATOR, numLevels, brw);
indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
fileSize).nextBlockAsStream(BlockType.ROOT_INDEX), numRootEntries);
fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
long prevOffset = -1;
int i = 0;

View File

@ -37,8 +37,7 @@ import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.RawComparator;
@ -72,14 +71,30 @@ public class TestHFileWriterV2 {
@Test
public void testHFileFormatV2() throws IOException {
Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
"testHFileFormatV2");
"testHFileFormatV2");
final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ;
final int entryCount = 10000;
writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false);
}
@Test
public void testMidKeyInHFile() throws IOException{
Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
"testMidKeyInHFile");
Compression.Algorithm compressAlgo = Compression.Algorithm.NONE;
int entryCount = 50000;
writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true);
}
private void writeDataAndReadFromHFile(Path hfilePath,
Algorithm compressAlgo, int entryCount, boolean findMidKey) throws IOException {
final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ;
HFileWriterV2 writer = (HFileWriterV2)
new HFileWriterV2.WriterFactoryV2(conf, new CacheConfig(conf))
.withPath(fs, hfilePath)
.withBlockSize(4096)
.withCompression(COMPRESS_ALGO)
.withCompression(compressAlgo)
.withComparator(KeyValue.KEY_COMPARATOR)
.create();
@ -88,11 +103,10 @@ public class TestHFileWriterV2 {
Random rand = new Random(9713312); // Just a fixed seed.
final int ENTRY_COUNT = 10000;
List<byte[]> keys = new ArrayList<byte[]>();
List<byte[]> values = new ArrayList<byte[]>();
for (int i = 0; i < ENTRY_COUNT; ++i) {
for (int i = 0; i < entryCount; ++i) {
byte[] keyBytes = randomOrderedKey(rand, i);
// A random-length random value.
@ -113,6 +127,7 @@ public class TestHFileWriterV2 {
writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
writer.close();
FSDataInputStream fsdis = fs.open(hfilePath);
@ -124,10 +139,10 @@ public class TestHFileWriterV2 {
FixedFileTrailer.readFromStream(fsdis, fileSize);
assertEquals(2, trailer.getMajorVersion());
assertEquals(ENTRY_COUNT, trailer.getEntryCount());
assertEquals(entryCount, trailer.getEntryCount());
HFileBlock.FSReader blockReader =
new HFileBlock.FSReaderV2(fsdis, COMPRESS_ALGO, fileSize);
new HFileBlock.FSReaderV2(fsdis, compressAlgo, fileSize);
// Comparator class name is stored in the trailer in version 2.
RawComparator<byte []> comparator = trailer.createComparator();
HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
@ -143,16 +158,21 @@ public class TestHFileWriterV2 {
// Data index. We also read statistics about the block index written after
// the root level.
dataBlockIndexReader.readMultiLevelIndexRoot(
blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
trailer.getDataIndexCount());
if (findMidKey) {
byte[] midkey = dataBlockIndexReader.midkey();
assertNotNull("Midkey should not be null", midkey);
}
// Meta index.
metaBlockIndexReader.readRootIndex(
blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(),
trailer.getMetaIndexCount());
// File info
FileInfo fileInfo = new FileInfo();
fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO));
fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
byte [] keyValueFormatVersion = fileInfo.get(
HFileWriterV2.KEY_VALUE_VERSION);
boolean includeMemstoreTS = keyValueFormatVersion != null &&
@ -200,7 +220,7 @@ public class TestHFileWriterV2 {
}
LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead="
+ blocksRead);
assertEquals(ENTRY_COUNT, entriesRead);
assertEquals(entryCount, entriesRead);
// Meta blocks. We can scan until the load-on-open data offset (which is
// the root block index offset in version 2) because we are not testing
@ -226,6 +246,7 @@ public class TestHFileWriterV2 {
fsdis.close();
}
// Static stuff used by various HFile v2 unit tests
private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";