HBASE-6114. CacheControl flags should be tunable per table schema per CF

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1344105 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Kyle Purtell 2012-05-30 05:26:15 +00:00
parent cf91e55fb5
commit 9bb6236ecc
4 changed files with 392 additions and 7 deletions

View File

@ -77,7 +77,11 @@ public class HColumnDescriptor implements WritableComparable<HColumnDescriptor>
public static final String DATA_BLOCK_ENCODING =
"DATA_BLOCK_ENCODING";
public static final String BLOCKCACHE = "BLOCKCACHE";
public static final String CACHE_DATA_ON_WRITE = "CACHE_DATA_ON_WRITE";
public static final String CACHE_INDEX_ON_WRITE = "CACHE_INDEX_ON_WRITE";
public static final String CACHE_BLOOMS_ON_WRITE = "CACHE_BLOOMS_ON_WRITE";
public static final String EVICT_BLOCKS_ON_CLOSE = "EVICT_BLOCKS_ON_CLOSE";
/**
* Size of storefile/hfile 'blocks'. Default is {@link #DEFAULT_BLOCKSIZE}.
* Use smaller block sizes for faster random-access at expense of larger
@ -141,6 +145,18 @@ public class HColumnDescriptor implements WritableComparable<HColumnDescriptor>
*/
public static final boolean DEFAULT_BLOCKCACHE = true;
/**
* Default setting for whether to cache data blocks on write if block caching
* is enabled.
*/
public static final boolean DEFAULT_CACHE_DATA_ON_WRITE = false;
/**
* Default setting for whether to cache index blocks on write if block
* caching is enabled.
*/
public static final boolean DEFAULT_CACHE_INDEX_ON_WRITE = false;
/**
* Default size of blocks in files stored to the filesytem (hfiles).
*/
@ -151,6 +167,12 @@ public class HColumnDescriptor implements WritableComparable<HColumnDescriptor>
*/
public static final String DEFAULT_BLOOMFILTER = StoreFile.BloomType.NONE.toString();
/**
* Default setting for whether to cache bloom filter blocks on write if block
* caching is enabled.
*/
public static final boolean DEFAULT_CACHE_BLOOMS_ON_WRITE = false;
/**
* Default time to live of cell contents.
*/
@ -161,6 +183,12 @@ public class HColumnDescriptor implements WritableComparable<HColumnDescriptor>
*/
public static final int DEFAULT_REPLICATION_SCOPE = HConstants.REPLICATION_SCOPE_LOCAL;
/**
* Default setting for whether to evict cached blocks from the blockcache on
* close.
*/
public static final boolean DEFAULT_EVICT_BLOCKS_ON_CLOSE = false;
private final static Map<String, String> DEFAULT_VALUES
= new HashMap<String, String>();
private final static Set<ImmutableBytesWritable> RESERVED_KEYWORDS
@ -178,6 +206,10 @@ public class HColumnDescriptor implements WritableComparable<HColumnDescriptor>
DEFAULT_VALUES.put(KEEP_DELETED_CELLS, String.valueOf(DEFAULT_KEEP_DELETED));
DEFAULT_VALUES.put(ENCODE_ON_DISK, String.valueOf(DEFAULT_ENCODE_ON_DISK));
DEFAULT_VALUES.put(DATA_BLOCK_ENCODING, String.valueOf(DEFAULT_DATA_BLOCK_ENCODING));
DEFAULT_VALUES.put(CACHE_DATA_ON_WRITE, String.valueOf(DEFAULT_CACHE_DATA_ON_WRITE));
DEFAULT_VALUES.put(CACHE_INDEX_ON_WRITE, String.valueOf(DEFAULT_CACHE_INDEX_ON_WRITE));
DEFAULT_VALUES.put(CACHE_BLOOMS_ON_WRITE, String.valueOf(DEFAULT_CACHE_BLOOMS_ON_WRITE));
DEFAULT_VALUES.put(EVICT_BLOCKS_ON_CLOSE, String.valueOf(DEFAULT_EVICT_BLOCKS_ON_CLOSE));
for (String s : DEFAULT_VALUES.keySet()) {
RESERVED_KEYWORDS.add(new ImmutableBytesWritable(Bytes.toBytes(s)));
}
@ -778,6 +810,84 @@ public class HColumnDescriptor implements WritableComparable<HColumnDescriptor>
return setValue(REPLICATION_SCOPE, Integer.toString(scope));
}
/**
* @return true if we should cache data blocks on write
*/
public boolean shouldCacheDataOnWrite() {
String value = getValue(CACHE_DATA_ON_WRITE);
if (value != null) {
return Boolean.valueOf(value).booleanValue();
}
return DEFAULT_CACHE_DATA_ON_WRITE;
}
/**
* @param value true if we should cache data blocks on write
* @return this (for chained invocation)
*/
public HColumnDescriptor setCacheDataOnWrite(boolean value) {
return setValue(CACHE_DATA_ON_WRITE, Boolean.toString(value));
}
/**
* @return true if we should cache index blocks on write
*/
public boolean shouldCacheIndexesOnWrite() {
String value = getValue(CACHE_INDEX_ON_WRITE);
if (value != null) {
return Boolean.valueOf(value).booleanValue();
}
return DEFAULT_CACHE_INDEX_ON_WRITE;
}
/**
* @param value true if we should cache index blocks on write
* @return this (for chained invocation)
*/
public HColumnDescriptor setCacheIndexesOnWrite(boolean value) {
return setValue(CACHE_INDEX_ON_WRITE, Boolean.toString(value));
}
/**
* @return true if we should cache bloomfilter blocks on write
*/
public boolean shouldCacheBloomsOnWrite() {
String value = getValue(CACHE_BLOOMS_ON_WRITE);
if (value != null) {
return Boolean.valueOf(value).booleanValue();
}
return DEFAULT_CACHE_BLOOMS_ON_WRITE;
}
/**
* @param value true if we should cache bloomfilter blocks on write
* @return this (for chained invocation)
*/
public HColumnDescriptor setCacheBloomsOnWrite(boolean value) {
return setValue(CACHE_BLOOMS_ON_WRITE, Boolean.toString(value));
}
/**
* @return true if we should evict cached blocks from the blockcache on
* close
*/
public boolean shouldEvictBlocksOnClose() {
String value = getValue(EVICT_BLOCKS_ON_CLOSE);
if (value != null) {
return Boolean.valueOf(value).booleanValue();
}
return DEFAULT_EVICT_BLOCKS_ON_CLOSE;
}
/**
* @param value true if we should evict cached blocks from the blockcache on
* close
* @return this (for chained invocation)
*/
public HColumnDescriptor setEvictBlocksOnClose(boolean value) {
return setValue(EVICT_BLOCKS_ON_CLOSE, Boolean.toString(value));
}
/**
* @see java.lang.Object#toString()
*/

View File

@ -117,13 +117,18 @@ public class CacheConfig {
*/
public CacheConfig(Configuration conf, HColumnDescriptor family) {
this(CacheConfig.instantiateBlockCache(conf),
family.isBlockCacheEnabled(), family.isInMemory(),
conf.getBoolean(CACHE_BLOCKS_ON_WRITE_KEY, DEFAULT_CACHE_DATA_ON_WRITE),
family.isBlockCacheEnabled(),
family.isInMemory(),
// For the following flags we enable them regardless of per-schema settings
// if they are enabled in the global configuration.
conf.getBoolean(CACHE_BLOCKS_ON_WRITE_KEY,
DEFAULT_CACHE_DATA_ON_WRITE) || family.shouldCacheDataOnWrite(),
conf.getBoolean(CACHE_INDEX_BLOCKS_ON_WRITE_KEY,
DEFAULT_CACHE_INDEXES_ON_WRITE),
DEFAULT_CACHE_INDEXES_ON_WRITE) || family.shouldCacheIndexesOnWrite(),
conf.getBoolean(CACHE_BLOOM_BLOCKS_ON_WRITE_KEY,
DEFAULT_CACHE_BLOOMS_ON_WRITE),
conf.getBoolean(EVICT_BLOCKS_ON_CLOSE_KEY, DEFAULT_EVICT_ON_CLOSE),
DEFAULT_CACHE_BLOOMS_ON_WRITE) || family.shouldCacheBloomsOnWrite(),
conf.getBoolean(EVICT_BLOCKS_ON_CLOSE_KEY,
DEFAULT_EVICT_ON_CLOSE) || family.shouldEvictBlocksOnClose(),
conf.getBoolean(CACHE_DATA_BLOCKS_COMPRESSED_KEY, DEFAULT_COMPRESSED_CACHE)
);
}

View File

@ -288,7 +288,7 @@ public class HFileBlock extends SchemaConfigured implements Cacheable {
* @return the on-disk size of the block with header size included. This
* includes the header, the data and the checksum data.
*/
int getOnDiskSizeWithHeader() {
public int getOnDiskSizeWithHeader() {
return onDiskSizeWithoutHeader + headerSize();
}

View File

@ -0,0 +1,270 @@
/*
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import static org.junit.Assert.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.hfile.BlockCache;
import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.hbase.io.hfile.HFileReaderV2;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.io.hfile.TestHFileWriterV2;
import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
/**
* Tests {@link HFile} cache-on-write functionality for data blocks, non-root
* index blocks, and Bloom filter blocks, as specified by the column family.
*/
@RunWith(Parameterized.class)
@Category(MediumTests.class)
public class TestCacheOnWriteInSchema {
private static final Log LOG = LogFactory.getLog(TestCacheOnWriteInSchema.class);
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static final String DIR = TEST_UTIL.getDataTestDir("TestCacheOnWriteInSchema").toString();
private static final byte [] table = Bytes.toBytes("table");
private static byte [] family = Bytes.toBytes("family");
private static final int NUM_KV = 25000;
private static final Random rand = new Random(12983177L);
/** The number of valid key types possible in a store file */
private static final int NUM_VALID_KEY_TYPES =
KeyValue.Type.values().length - 2;
private static enum CacheOnWriteType {
DATA_BLOCKS(BlockType.DATA, BlockType.ENCODED_DATA),
BLOOM_BLOCKS(BlockType.BLOOM_CHUNK),
INDEX_BLOCKS(BlockType.LEAF_INDEX, BlockType.INTERMEDIATE_INDEX);
private final BlockType blockType1;
private final BlockType blockType2;
private CacheOnWriteType(BlockType blockType) {
this(blockType, blockType);
}
private CacheOnWriteType(BlockType blockType1, BlockType blockType2) {
this.blockType1 = blockType1;
this.blockType2 = blockType2;
}
public boolean shouldBeCached(BlockType blockType) {
return blockType == blockType1 || blockType == blockType2;
}
public void modifyFamilySchema(HColumnDescriptor family) {
switch (this) {
case DATA_BLOCKS:
family.setCacheDataOnWrite(true);
break;
case BLOOM_BLOCKS:
family.setCacheBloomsOnWrite(true);
break;
case INDEX_BLOCKS:
family.setCacheIndexesOnWrite(true);
break;
}
}
}
private final CacheOnWriteType cowType;
private Configuration conf;
private final String testDescription;
private Store store;
private FileSystem fs;
public TestCacheOnWriteInSchema(CacheOnWriteType cowType) {
this.cowType = cowType;
testDescription = "[cacheOnWrite=" + cowType + "]";
System.out.println(testDescription);
}
@Parameters
public static Collection<Object[]> getParameters() {
List<Object[]> cowTypes = new ArrayList<Object[]>();
for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
cowTypes.add(new Object[] { cowType });
}
return cowTypes;
}
@Before
public void setUp() throws IOException {
conf = TEST_UTIL.getConfiguration();
conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false);
conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false);
conf.setBoolean(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY, false);
fs = HFileSystem.get(conf);
// Create the schema
HColumnDescriptor hcd = new HColumnDescriptor(family);
hcd.setBloomFilterType(BloomType.ROWCOL);
cowType.modifyFamilySchema(hcd);
HTableDescriptor htd = new HTableDescriptor(table);
htd.addFamily(hcd);
// Create a store based on the schema
Path basedir = new Path(DIR);
Path logdir = new Path(DIR+"/logs");
Path oldLogDir = new Path(basedir, HConstants.HREGION_OLDLOGDIR_NAME);
fs.delete(logdir, true);
HRegionInfo info = new HRegionInfo(htd.getName(), null, null, false);
HLog hlog = new HLog(fs, logdir, oldLogDir, conf);
HRegion region = new HRegion(basedir, hlog, fs, conf, info, htd, null);
store = new Store(basedir, region, hcd, fs, conf);
}
@After
public void tearDown() {
try {
fs.delete(new Path(DIR), true);
} catch (IOException e) {
LOG.error("Could not delete " + DIR, e);
}
}
@Test
public void testCacheOnWriteInSchema() throws IOException {
// Write some random data into the store
StoreFile.Writer writer = store.createWriterInTmp(Integer.MAX_VALUE,
HFile.DEFAULT_COMPRESSION_ALGORITHM, false);
writeStoreFile(writer);
writer.close();
// Verify the block types of interest were cached on write
readStoreFile(writer.getPath());
}
private void readStoreFile(Path path) throws IOException {
CacheConfig cacheConf = store.getCacheConfig();
BlockCache cache = cacheConf.getBlockCache();
StoreFile sf = new StoreFile(fs, path, conf, cacheConf,
BloomType.ROWCOL, null);
store.passSchemaMetricsTo(sf);
HFileReaderV2 reader = (HFileReaderV2) sf.createReader().getHFileReader();
try {
// Open a scanner with (on read) caching disabled
HFileScanner scanner = reader.getScanner(false, false);
assertTrue(testDescription, scanner.seekTo());
// Cribbed from io.hfile.TestCacheOnWrite
long offset = 0;
HFileBlock prevBlock = null;
while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
long onDiskSize = -1;
if (prevBlock != null) {
onDiskSize = prevBlock.getNextBlockOnDiskSizeWithHeader();
}
// Flags: don't cache the block, use pread, this is not a compaction.
// Also, pass null for expected block type to avoid checking it.
HFileBlock block = reader.readBlock(offset, onDiskSize, false, true,
false, null);
BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(),
offset);
boolean isCached = cache.getBlock(blockCacheKey, true) != null;
boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
if (shouldBeCached != isCached) {
throw new AssertionError(
"shouldBeCached: " + shouldBeCached+ "\n" +
"isCached: " + isCached + "\n" +
"Test description: " + testDescription + "\n" +
"block: " + block + "\n" +
"blockCacheKey: " + blockCacheKey);
}
prevBlock = block;
offset += block.getOnDiskSizeWithHeader();
}
} finally {
reader.close();
}
}
private static KeyValue.Type generateKeyType(Random rand) {
if (rand.nextBoolean()) {
// Let's make half of KVs puts.
return KeyValue.Type.Put;
} else {
KeyValue.Type keyType =
KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)];
if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum)
{
throw new RuntimeException("Generated an invalid key type: " + keyType
+ ". " + "Probably the layout of KeyValue.Type has changed.");
}
return keyType;
}
}
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
final int rowLen = 32;
for (int i = 0; i < NUM_KV; ++i) {
byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
byte[] v = TestHFileWriterV2.randomValue(rand);
int cfLen = rand.nextInt(k.length - rowLen + 1);
KeyValue kv = new KeyValue(
k, 0, rowLen,
k, rowLen, cfLen,
k, rowLen + cfLen, k.length - rowLen - cfLen,
rand.nextLong(),
generateKeyType(rand),
v, 0, v.length);
writer.append(kv);
}
}
@org.junit.Rule
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
}