HBASE-17151 New API to create HFile.Reader without instantiating block cache (Vladimir Rodionov)

This commit is contained in:
Enis Soztutar 2016-11-30 17:04:31 -08:00
parent ad857d1b77
commit b6f5d5b85f
3 changed files with 130 additions and 19 deletions

View File

@ -25,10 +25,10 @@ import java.lang.management.ManagementFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
import org.apache.hadoop.hbase.util.ReflectionUtils;
@ -43,6 +43,14 @@ import com.google.common.annotations.VisibleForTesting;
public class CacheConfig {
private static final Log LOG = LogFactory.getLog(CacheConfig.class.getName());
/**
* Disabled cache configuration
*/
public static final CacheConfig DISABLED = new CacheConfig();
/**
* Configuration key to cache data blocks on read. Bloom blocks and index blocks are always be
* cached if the block cache is enabled.
@ -320,6 +328,11 @@ public class CacheConfig {
cacheConf.cacheDataInL1, cacheConf.dropBehindCompaction);
}
private CacheConfig() {
this(null, false, false, false, false, false,
false, false, false, false, false);
}
/**
* Checks whether the block cache is enabled.
*/

View File

@ -545,6 +545,19 @@ public class HFile {
return pickReaderVersion(path, fsdis, size, cacheConf, hfs, conf);
}
/**
* Creates reader with cache configuration disabled
* @param fs filesystem
* @param path Path to file to read
* @return an active Reader instance
* @throws IOException Will throw a CorruptHFileException
* (DoNotRetryIOException subtype) if hfile is corrupt/invalid.
*/
public static Reader createReader(
FileSystem fs, Path path, Configuration conf) throws IOException {
return createReader(fs, path, CacheConfig.DISABLED, conf);
}
/**
*
* @param fs filesystem
@ -655,82 +668,102 @@ public class HFile {
return this;
}
@Override
public void clear() {
this.map.clear();
}
@Override
public Comparator<? super byte[]> comparator() {
return map.comparator();
}
@Override
public boolean containsKey(Object key) {
return map.containsKey(key);
}
@Override
public boolean containsValue(Object value) {
return map.containsValue(value);
}
@Override
public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
return map.entrySet();
}
@Override
public boolean equals(Object o) {
return map.equals(o);
}
@Override
public byte[] firstKey() {
return map.firstKey();
}
@Override
public byte[] get(Object key) {
return map.get(key);
}
@Override
public int hashCode() {
return map.hashCode();
}
@Override
public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
return this.map.headMap(toKey);
}
@Override
public boolean isEmpty() {
return map.isEmpty();
}
@Override
public Set<byte[]> keySet() {
return map.keySet();
}
@Override
public byte[] lastKey() {
return map.lastKey();
}
@Override
public byte[] put(byte[] key, byte[] value) {
return this.map.put(key, value);
}
@Override
public void putAll(Map<? extends byte[], ? extends byte[]> m) {
this.map.putAll(m);
}
@Override
public byte[] remove(Object key) {
return this.map.remove(key);
}
@Override
public int size() {
return map.size();
}
@Override
public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
return this.map.subMap(fromKey, toKey);
}
@Override
public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
return this.map.tailMap(fromKey);
}
@Override
public Collection<byte[]> values() {
return map.values();
}

View File

@ -18,12 +18,18 @@
*/
package org.apache.hadoop.hbase.io.hfile;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Map;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -33,6 +39,7 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ArrayBackedTag;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
@ -42,21 +49,21 @@ import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.ArrayBackedTag;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
import org.apache.hadoop.hbase.testclassification.IOTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable;
import org.junit.*;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;
import static org.junit.Assert.*;
/**
* test hfile features.
*/
@ -66,14 +73,13 @@ public class TestHFile {
@Rule public TestName testName = new TestName();
private static final Log LOG = LogFactory.getLog(TestHFile.class);
private static final int NUM_VALID_KEY_TYPES = KeyValue.Type.values().length - 2;
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static String ROOT_DIR =
TEST_UTIL.getDataTestDir("TestHFile").toString();
private final int minBlockSize = 512;
private static String localFormatter = "%010d";
private static CacheConfig cacheConf = null;
private Map<String, Long> startingMetrics;
private static Configuration conf ;
private static FileSystem fs;
@ -83,6 +89,65 @@ public class TestHFile {
fs = TEST_UTIL.getTestFileSystem();
}
@Test
public void testReaderWithoutBlockCache() throws Exception {
Path path = writeStoreFile();
try{
readStoreFile(path);
} catch (Exception e) {
// fail test
assertTrue(false);
}
}
private void readStoreFile(Path storeFilePath) throws Exception {
// Open the file reader with block cache disabled.
HFile.Reader reader = HFile.createReader(fs, storeFilePath, conf);
long offset = 0;
while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
HFileBlock block = reader.readBlock(offset, -1, false, true, false, true, null, null);
offset += block.getOnDiskSizeWithHeader();
}
}
private Path writeStoreFile() throws IOException {
Path storeFileParentDir = new Path(TEST_UTIL.getDataTestDir(), "TestHFile");
HFileContext meta = new HFileContextBuilder().withBlockSize(64 * 1024).build();
StoreFileWriter sfw =
new StoreFileWriter.Builder(conf, cacheConf, fs).withOutputDir(storeFileParentDir)
.withComparator(CellComparator.COMPARATOR).withFileContext(meta).build();
final int rowLen = 32;
Random RNG = new Random();
for (int i = 0; i < 1000; ++i) {
byte[] k = RandomKeyValueUtil.randomOrderedKey(RNG, i);
byte[] v = RandomKeyValueUtil.randomValue(RNG);
int cfLen = RNG.nextInt(k.length - rowLen + 1);
KeyValue kv =
new KeyValue(k, 0, rowLen, k, rowLen, cfLen, k, rowLen + cfLen,
k.length - rowLen - cfLen, RNG.nextLong(), generateKeyType(RNG), v, 0, v.length);
sfw.append(kv);
}
sfw.close();
return sfw.getPath();
}
public static KeyValue.Type generateKeyType(Random rand) {
if (rand.nextBoolean()) {
// Let's make half of KVs puts.
return KeyValue.Type.Put;
} else {
KeyValue.Type keyType = KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)];
if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum) {
throw new RuntimeException("Generated an invalid key type: " + keyType + ". "
+ "Probably the layout of KeyValue.Type has changed.");
}
return keyType;
}
}
/**
* Test empty HFile.
* Test all features work reasonably when hfile is empty of entries.
@ -450,7 +515,7 @@ public class TestHFile {
mid = HFileWriterImpl.getMidpoint(CellComparator.COMPARATOR, left, right);
assertTrue(CellComparator.COMPARATOR.compareKeyIgnoresMvcc(left, mid) < 0);
assertTrue(CellComparator.COMPARATOR.compareKeyIgnoresMvcc(mid, right) < 0);
assertEquals(1, (int) mid.getRowLength());
assertEquals(1, mid.getRowLength());
left = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a"));
right = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("a"));
@ -463,21 +528,21 @@ public class TestHFile {
mid = HFileWriterImpl.getMidpoint(CellComparator.COMPARATOR, left, right);
assertTrue(CellComparator.COMPARATOR.compareKeyIgnoresMvcc(left, mid) < 0);
assertTrue(CellComparator.COMPARATOR.compareKeyIgnoresMvcc(mid, right) < 0);
assertEquals(2, (int) mid.getFamilyLength());
assertEquals(2, mid.getFamilyLength());
left = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a"));
right = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("aaaaaaaaa"));
mid = HFileWriterImpl.getMidpoint(CellComparator.COMPARATOR, left, right);
assertTrue(CellComparator.COMPARATOR.compareKeyIgnoresMvcc(left, mid) < 0);
assertTrue(CellComparator.COMPARATOR.compareKeyIgnoresMvcc(mid, right) < 0);
assertEquals(2, (int) mid.getQualifierLength());
assertEquals(2, mid.getQualifierLength());
left = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a"));
right = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("b"));
mid = HFileWriterImpl.getMidpoint(CellComparator.COMPARATOR, left, right);
assertTrue(CellComparator.COMPARATOR.compareKeyIgnoresMvcc(left, mid) < 0);
assertTrue(CellComparator.COMPARATOR.compareKeyIgnoresMvcc(mid, right) <= 0);
assertEquals(1, (int) mid.getQualifierLength());
assertEquals(1, mid.getQualifierLength());
// Assert that if meta comparator, it returns the right cell -- i.e. no
// optimization done.