From 7e095fa615f62777fad8ca84c4b56b2f1baf3cc6 Mon Sep 17 00:00:00 2001 From: Jean-Daniel Cryans Date: Thu, 15 Dec 2011 18:26:35 +0000 Subject: [PATCH] HBASE-4683 Always cache index and bloom blocks git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1214902 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../hbase/io/hfile/AbstractHFileWriter.java | 2 - .../hadoop/hbase/io/hfile/CacheConfig.java | 14 +++ .../hadoop/hbase/io/hfile/HFileBlock.java | 2 - .../hadoop/hbase/io/hfile/HFileReaderV1.java | 26 ++++-- .../hadoop/hbase/io/hfile/HFileReaderV2.java | 19 ++-- .../hadoop/hbase/io/hfile/HFileWriterV2.java | 1 - .../hadoop/hbase/regionserver/Store.java | 20 +++-- .../hadoop/hbase/regionserver/StoreFile.java | 12 ++- .../metrics/SchemaConfigured.java | 24 +++-- .../regionserver/metrics/SchemaMetrics.java | 10 ++- .../hadoop/hbase/HBaseTestingUtility.java | 22 +++++ .../regionserver/CreateRandomStoreFile.java | 3 +- .../hbase/regionserver/TestHRegion.java | 2 + .../regionserver/TestMultiColumnScanner.java | 87 ++++--------------- .../regionserver/TestScanWithBloomError.java | 26 ++++-- .../regionserver/TestSeekOptimizations.java | 24 +++-- .../metrics/TestSchemaConfigured.java | 34 +++++++- 18 files changed, 200 insertions(+), 129 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 1e9143e88d4..280bf66e9b4 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -745,6 +745,7 @@ Release 0.92.0 - Unreleased HBASE-4805 Allow better control of resource consumption in HTable (Lars H) HBASE-4903 Return a result from RegionObserver.preIncrement (Daniel Gómez Ferro via Lars H) + HBASE-4683 Always cache index and bloom blocks TASKS HBASE-3559 Move report of split to master OFF the heartbeat channel diff --git a/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java b/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java index 8812765414b..4c74738be3a 100644 --- a/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java +++ b/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java @@ -1,6 +1,4 @@ /* - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java b/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java index 6461d53735c..c92cc029ef4 100644 --- a/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java +++ b/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java @@ -25,6 +25,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.util.DirectMemoryUtils; import org.apache.hadoop.util.StringUtils; @@ -207,6 +208,19 @@ public class CacheConfig { return isBlockCacheEnabled() && cacheDataOnRead; } + /** + * Should we cache a block of a particular category? We always cache + * important blocks such as index blocks, as long as the block cache is + * available. + */ + public boolean shouldCacheBlockOnRead(BlockCategory category) { + boolean shouldCache = isBlockCacheEnabled() + && (cacheDataOnRead || + category == BlockCategory.INDEX || + category == BlockCategory.BLOOM); + return shouldCache; + } + /** * @return true if blocks in this file should be flagged as in-memory */ diff --git a/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index 7fff5705be2..2a04e9b10e8 100644 --- a/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -1,6 +1,4 @@ /* - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java b/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java index ba2ff46bb2e..7d63c4f123a 100644 --- a/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java +++ b/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.io.hfile.HFile.Reader; import org.apache.hadoop.hbase.io.hfile.HFile.Writer; +import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.RawComparator; @@ -213,6 +214,12 @@ public class HFileReaderV1 extends AbstractHFileReader { BlockCacheKey cacheKey = HFile.getBlockCacheKey(name, offset); + BlockCategory effectiveCategory = BlockCategory.META; + if (metaBlockName.equals(HFileWriterV1.BLOOM_FILTER_META_KEY) || + metaBlockName.equals(HFileWriterV1.BLOOM_FILTER_DATA_KEY)) { + effectiveCategory = BlockCategory.BLOOM; + } + // Per meta key from any given file, synchronize reads for said block synchronized (metaBlockIndexReader.getRootBlockKey(block)) { metaLoads.incrementAndGet(); @@ -221,10 +228,11 @@ public class HFileReaderV1 extends AbstractHFileReader { if (cacheConf.isBlockCacheEnabled()) { HFileBlock cachedBlock = (HFileBlock) cacheConf.getBlockCache().getBlock(cacheKey, - cacheConf.shouldCacheDataOnRead()); + cacheConf.shouldCacheBlockOnRead(effectiveCategory)); if (cachedBlock != null) { cacheHits.incrementAndGet(); - getSchemaMetrics().updateOnCacheHit(BlockCategory.META, false); + getSchemaMetrics().updateOnCacheHit(effectiveCategory, + SchemaMetrics.NO_COMPACTION); return cachedBlock.getBufferWithoutHeader(); } // Cache Miss, please load. @@ -239,10 +247,11 @@ public class HFileReaderV1 extends AbstractHFileReader { long delta = System.nanoTime() - startTimeNs; HFile.preadTimeNano.addAndGet(delta); HFile.preadOps.incrementAndGet(); - getSchemaMetrics().updateOnCacheMiss(BlockCategory.META, false, delta); + getSchemaMetrics().updateOnCacheMiss(effectiveCategory, + SchemaMetrics.NO_COMPACTION, delta); // Cache the block - if (cacheConf.shouldCacheDataOnRead() && cacheBlock) { + if (cacheBlock && cacheConf.shouldCacheBlockOnRead(effectiveCategory)) { cacheConf.getBlockCache().cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory()); } @@ -288,7 +297,7 @@ public class HFileReaderV1 extends AbstractHFileReader { cacheConf.shouldCacheDataOnRead()); if (cachedBlock != null) { cacheHits.incrementAndGet(); - getSchemaMetrics().updateOnCacheHit(BlockCategory.DATA, + getSchemaMetrics().updateOnCacheHit(cachedBlock.getBlockType().getCategory(), isCompaction); return cachedBlock.getBufferWithoutHeader(); } @@ -327,7 +336,8 @@ public class HFileReaderV1 extends AbstractHFileReader { delta); // Cache the block - if (cacheConf.shouldCacheDataOnRead() && cacheBlock) { + if (cacheBlock && cacheConf.shouldCacheBlockOnRead( + hfileBlock.getBlockType().getCategory())) { cacheConf.getBlockCache().cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory()); } @@ -455,7 +465,6 @@ public class HFileReaderV1 extends AbstractHFileReader { throw e; } if (blockBuffer.remaining() <= 0) { - // LOG.debug("Fetch next block"); currBlock++; if (currBlock >= reader.getDataBlockIndexReader().getRootBlockCount()) { // damn we are at the end @@ -671,7 +680,8 @@ public class HFileReaderV1 extends AbstractHFileReader { @Override public DataInput getGeneralBloomFilterMetadata() throws IOException { - ByteBuffer buf = getMetaBlock(HFileWriterV1.BLOOM_FILTER_META_KEY, false); + // Always cache Bloom filter blocks. + ByteBuffer buf = getMetaBlock(HFileWriterV1.BLOOM_FILTER_META_KEY, true); if (buf == null) return null; ByteArrayInputStream bais = new ByteArrayInputStream(buf.array(), diff --git a/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java b/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java index 61c049e24ce..59033f42e00 100644 --- a/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java +++ b/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java @@ -232,7 +232,7 @@ public class HFileReaderV2 extends AbstractHFileReader { */ @Override public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, - boolean cacheBlock, boolean pread, final boolean isCompaction) + final boolean cacheBlock, boolean pread, final boolean isCompaction) throws IOException { if (dataBlockIndexReader == null) { throw new IOException("Block index not loaded"); @@ -255,7 +255,6 @@ public class HFileReaderV2 extends AbstractHFileReader { blockLoads.incrementAndGet(); // Check cache for block. If found return. - cacheBlock &= cacheConf.shouldCacheDataOnRead(); if (cacheConf.isBlockCacheEnabled()) { HFileBlock cachedBlock = (HFileBlock) cacheConf.getBlockCache().getBlock(cacheKey, cacheBlock); @@ -275,10 +274,10 @@ public class HFileReaderV2 extends AbstractHFileReader { // Load block from filesystem. long startTimeNs = System.nanoTime(); - HFileBlock dataBlock = fsBlockReader.readBlockData(dataBlockOffset, + HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, -1, pread); - passSchemaMetricsTo(dataBlock); - BlockCategory blockCategory = dataBlock.getBlockType().getCategory(); + passSchemaMetricsTo(hfileBlock); + BlockCategory blockCategory = hfileBlock.getBlockType().getCategory(); long delta = System.nanoTime() - startTimeNs; if (pread) { @@ -291,15 +290,17 @@ public class HFileReaderV2 extends AbstractHFileReader { getSchemaMetrics().updateOnCacheMiss(blockCategory, isCompaction, delta); // Cache the block - if (cacheBlock) { - cacheConf.getBlockCache().cacheBlock(cacheKey, dataBlock, + if (cacheBlock && cacheConf.shouldCacheBlockOnRead( + hfileBlock.getBlockType().getCategory())) { + cacheConf.getBlockCache().cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory()); } - if (dataBlock.getBlockType() == BlockType.DATA) + if (hfileBlock.getBlockType() == BlockType.DATA) { HFile.dataBlockReadCnt.incrementAndGet(); + } - return dataBlock; + return hfileBlock; } finally { offsetLock.releaseLockEntry(lockEntry); } diff --git a/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java b/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java index bc61a3ed4fc..d44a9954067 100644 --- a/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java +++ b/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java @@ -37,7 +37,6 @@ import org.apache.hadoop.hbase.KeyValue.KeyComparator; import org.apache.hadoop.hbase.io.hfile.HFile.Writer; import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; -import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.BloomFilterWriter; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.Writable; diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java b/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java index ade34dd5c96..97c2d46462e 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java @@ -53,9 +53,8 @@ import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.io.hfile.InvalidHFileException; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.regionserver.StoreScanner.ScanType; -import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress; -import org.apache.hadoop.hbase.monitoring.TaskMonitor; import org.apache.hadoop.hbase.regionserver.compactions.CompactSelection; +import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress; import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest; import org.apache.hadoop.hbase.regionserver.metrics.SchemaConfigured; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; @@ -294,6 +293,7 @@ public class Store extends SchemaConfigured implements HeapSize { } StoreFile curfile = new StoreFile(fs, p, this.conf, this.cacheConf, this.family.getBloomFilterType()); + passSchemaMetricsTo(curfile); curfile.createReader(); long length = curfile.getReader().length(); this.storeSize += length; @@ -448,6 +448,7 @@ public class Store extends SchemaConfigured implements HeapSize { StoreFile sf = new StoreFile(fs, dstPath, this.conf, this.cacheConf, this.family.getBloomFilterType()); + passSchemaMetricsTo(sf); sf.createReader(); LOG.info("Moved hfile " + srcPath + " into store directory " + @@ -651,6 +652,7 @@ public class Store extends SchemaConfigured implements HeapSize { status.setStatus("Flushing " + this + ": reopening flushed file"); StoreFile sf = new StoreFile(this.fs, dstPath, this.conf, this.cacheConf, this.family.getBloomFilterType()); + passSchemaMetricsTo(sf); StoreFile.Reader r = sf.createReader(); this.storeSize += r.length(); this.totalUncompressedBytes += r.getTotalUncompressedBytes(); @@ -690,13 +692,11 @@ public class Store extends SchemaConfigured implements HeapSize { StoreFile.Writer w = StoreFile.createWriter(fs, region.getTmpDir(), blocksize, compression, comparator, conf, cacheConf, family.getBloomFilterType(), maxKeyCount); - if (w.writer instanceof SchemaConfigured) { - // The store file writer's path does not include the CF name, so we need - // to configure the HFile writer directly. - SchemaConfigured sc = (SchemaConfigured) w.writer; - SchemaConfigured.resetSchemaMetricsConf(sc); - passSchemaMetricsTo(sc); - } + // The store file writer's path does not include the CF name, so we need + // to configure the HFile writer directly. + SchemaConfigured sc = (SchemaConfigured) w.writer; + SchemaConfigured.resetSchemaMetricsConf(sc); + passSchemaMetricsTo(sc); return w; } @@ -1417,6 +1417,7 @@ public class Store extends SchemaConfigured implements HeapSize { try { storeFile = new StoreFile(this.fs, path, this.conf, this.cacheConf, this.family.getBloomFilterType()); + passSchemaMetricsTo(storeFile); storeFile.createReader(); } catch (IOException e) { LOG.error("Failed to open store file : " + path @@ -1468,6 +1469,7 @@ public class Store extends SchemaConfigured implements HeapSize { } result = new StoreFile(this.fs, destPath, this.conf, this.cacheConf, this.family.getBloomFilterType()); + passSchemaMetricsTo(result); result.createReader(); } try { diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java index ff6c17e4481..550b01b1c41 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java @@ -82,7 +82,7 @@ import com.google.common.collect.Ordering; * The reason for this weird pattern where you use a different instance for the * writer and a reader is that we write once but read a lot more. */ -public class StoreFile { +public class StoreFile extends SchemaConfigured { static final Log LOG = LogFactory.getLog(StoreFile.class.getName()); public static enum BloomType { @@ -498,6 +498,11 @@ public class StoreFile { this.reader = new Reader(this.fs, this.path, this.cacheConf); } + if (isSchemaConfigured()) { + SchemaConfigured.resetSchemaMetricsConf(reader); + passSchemaMetricsTo(reader); + } + computeHDFSBlockDistribution(); // Load up indices and fileinfo. This also loads Bloom filter type. @@ -1575,6 +1580,11 @@ public class StoreFile { public long getMaxTimestamp() { return timeRangeTracker.maximumTimestamp; } + + @Override + public void schemaConfigurationChanged() { + passSchemaMetricsTo((SchemaConfigured) reader); + } } /** diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaConfigured.java b/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaConfigured.java index d8ac453ecd6..ae45a26d798 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaConfigured.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaConfigured.java @@ -1,6 +1,4 @@ /* - * Copyright The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -115,8 +113,8 @@ public class SchemaConfigured implements HeapSize, SchemaAware { } else { cfName = splits[splits.length - 2]; if (cfName.equals(HRegion.REGION_TEMP_SUBDIR)) { - // This is probably a compaction output file. We will set the real CF - // name later. + // This is probably a compaction or flush output file. We will set + // the real CF name later. cfName = null; } else { cfName = cfName.intern(); @@ -148,8 +146,8 @@ public class SchemaConfigured implements HeapSize, SchemaAware { public SchemaConfigured(Configuration conf, String tableName, String cfName) { this(conf); - this.tableName = tableName.intern(); - this.cfName = cfName.intern(); + this.tableName = tableName != null ? tableName.intern() : tableName; + this.cfName = cfName != null ? cfName.intern() : cfName; } public SchemaConfigured(SchemaAware that) { @@ -187,9 +185,15 @@ public class SchemaConfigured implements HeapSize, SchemaAware { * object. */ public void passSchemaMetricsTo(SchemaConfigured target) { + if (isNull()) { + resetSchemaMetricsConf(target); + return; + } + if (!isSchemaConfigured()) { // Cannot configure another object if we are not configured ourselves. - throw new IllegalStateException("Table name/CF not initialized"); + throw new IllegalStateException("Table name/CF not initialized: " + + schemaConfAsJSON()); } if (conflictingWith(target)) { @@ -198,6 +202,7 @@ public class SchemaConfigured implements HeapSize, SchemaAware { tableName + "\", CF name to \"" + cfName + "\" from " + target.schemaConfAsJSON()); } + target.tableName = tableName.intern(); target.cfName = cfName.intern(); target.schemaMetrics = schemaMetrics; @@ -215,6 +220,7 @@ public class SchemaConfigured implements HeapSize, SchemaAware { target.tableName = null; target.cfName = null; target.schemaMetrics = null; + target.schemaConfigurationChanged(); } @Override @@ -231,6 +237,10 @@ public class SchemaConfigured implements HeapSize, SchemaAware { return tableName != null && cfName != null; } + private boolean isNull() { + return tableName == null && cfName == null && schemaMetrics == null; + } + /** * Determines if the current object's table/CF settings are not in conflict * with the other object's table and CF. If the other object's table/CF are diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaMetrics.java b/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaMetrics.java index cef50d43a02..3e587c30c7f 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaMetrics.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaMetrics.java @@ -172,6 +172,9 @@ public class SchemaMetrics { public static final String CF_UNKNOWN_PREFIX = CF_PREFIX + UNKNOWN + "."; public static final String CF_BAD_FAMILY_PREFIX = CF_PREFIX + "__badfamily."; + /** Use for readability when obtaining non-compaction counters */ + public static final boolean NO_COMPACTION = false; + /** * A special schema metric value that means "all tables aggregated" or * "all column families aggregated" when used as a table name or a column @@ -732,7 +735,12 @@ public class SchemaMetrics { m.remove(k); } } - private static Map diffMetrics(Map a, + + /** + * @return the difference between two sets of metrics (second minus first). + * Only includes keys that have nonzero difference. + */ + public static Map diffMetrics(Map a, Map b) { Set allKeys = new TreeSet(a.keySet()); allKeys.addAll(b.keySet()); diff --git a/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index 06b20c627f7..9ea19e58e1c 100644 --- a/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -57,6 +57,7 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.hfile.Compression; +import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionServer; @@ -64,6 +65,7 @@ import org.apache.hadoop.hbase.regionserver.InternalScanner; import org.apache.hadoop.hbase.regionserver.MultiVersionConsistencyControl; import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; @@ -1874,4 +1876,24 @@ public class HBaseTestingUtility { return hloc.getPort(); } + public HRegion createTestRegion(String tableName, String cfName, + Compression.Algorithm comprAlgo, BloomType bloomType, int maxVersions, + boolean blockCacheEnabled, int blockSize) throws IOException { + HColumnDescriptor hcd = + new HColumnDescriptor(Bytes.toBytes(cfName), maxVersions, + comprAlgo.getName(), + HColumnDescriptor.DEFAULT_IN_MEMORY, + blockCacheEnabled, + HColumnDescriptor.DEFAULT_TTL, + bloomType.toString()); + hcd.setBlocksize(HFile.DEFAULT_BLOCKSIZE); + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.addFamily(hcd); + HRegionInfo info = + new HRegionInfo(Bytes.toBytes(tableName), null, null, false); + HRegion region = + HRegion.createHRegion(info, getDataTestDir(), getConfiguration(), htd); + return region; + } + } diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java b/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java index 42ea29b1f7e..45aed4ded08 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.Compression; +import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.util.BloomFilterFactory; @@ -168,7 +169,7 @@ public class CreateRandomStoreFile { BLOOM_FILTER_OPTION)); } - int blockSize = conf.getInt("hfile.min.blocksize.size", 65536); + int blockSize = HFile.DEFAULT_BLOCKSIZE; if (cmdLine.hasOption(BLOCK_SIZE_OPTION)) blockSize = Integer.valueOf(cmdLine.getOptionValue(BLOCK_SIZE_OPTION)); diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index 362b82dce25..c791bb5551d 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -66,9 +66,11 @@ import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.NullComparator; import org.apache.hadoop.hbase.filter.PrefixFilter; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; +import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.monitoring.TaskMonitor; import org.apache.hadoop.hbase.regionserver.HRegion.RegionScannerImpl; +import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.regionserver.wal.HLogKey; diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java index b599513a69a..18d8ba75708 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java @@ -19,7 +19,9 @@ */ package org.apache.hadoop.hbase.regionserver; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.ArrayList; @@ -35,15 +37,17 @@ import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTestUtil; +import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.io.hfile.BlockType; import org.apache.hadoop.hbase.io.hfile.Compression; -import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; +import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; -import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.BlockMetricType; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Before; import org.junit.Test; @@ -61,14 +65,13 @@ public class TestMultiColumnScanner { private static final Log LOG = LogFactory.getLog(TestMultiColumnScanner.class); - private static final String TABLE_NAME = "TestMultiColumnScanner"; + private static final String TABLE_NAME = + TestMultiColumnScanner.class.getSimpleName(); + static final int MAX_VERSIONS = 50; - // These fields are used in TestScanWithBloomError - static final String FAMILY = "CF"; - static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY); - - private SchemaMetrics schemaMetrics; + private static final String FAMILY = "CF"; + private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY); /** * The size of the column qualifier set used. Increasing this parameter @@ -107,9 +110,6 @@ public class TestMultiColumnScanner { private Compression.Algorithm comprAlgo; private StoreFile.BloomType bloomType; - private long lastBlocksRead; - private long lastCacheHits; - // Some static sanity-checking. static { assertTrue(BIG_LONG > 0.9 * Long.MAX_VALUE); // Guard against typos. @@ -122,7 +122,6 @@ public class TestMultiColumnScanner { @Before public void setUp() { SchemaMetrics.configureGlobally(TEST_UTIL.getConfiguration()); - schemaMetrics = SchemaMetrics.getInstance(TABLE_NAME, FAMILY); } @@ -137,43 +136,11 @@ public class TestMultiColumnScanner { this.bloomType = bloomType; } - private long getBlocksRead() { - return HRegion.getNumericMetric(schemaMetrics.getBlockMetricName( - BlockType.BlockCategory.ALL_CATEGORIES, false, - BlockMetricType.READ_COUNT)); - } - - private long getCacheHits() { - return HRegion.getNumericMetric(schemaMetrics.getBlockMetricName( - BlockType.BlockCategory.ALL_CATEGORIES, false, - BlockMetricType.CACHE_HIT)); - } - - private void saveBlockStats() { - lastBlocksRead = getBlocksRead(); - lastCacheHits = getCacheHits(); - } - - private void showBlockStats() { - long blocksRead = blocksReadDelta(); - long cacheHits = cacheHitsDelta(); - LOG.info("Compression: " + comprAlgo + ", Bloom type: " - + bloomType + ", blocks read: " + blocksRead + ", block cache hits: " - + cacheHits + ", misses: " + (blocksRead - cacheHits)); - } - - private long cacheHitsDelta() { - return getCacheHits() - lastCacheHits; - } - - private long blocksReadDelta() { - return getBlocksRead() - lastBlocksRead; - } - @Test public void testMultiColumnScanner() throws IOException { - HRegion region = createRegion(TABLE_NAME, comprAlgo, bloomType, - MAX_VERSIONS); + HRegion region = TEST_UTIL.createTestRegion(TABLE_NAME, FAMILY, comprAlgo, + bloomType, MAX_VERSIONS, HColumnDescriptor.DEFAULT_BLOCKCACHE, + HFile.DEFAULT_BLOCKSIZE); List rows = sequentialStrings("row", NUM_ROWS); List qualifiers = sequentialStrings("qual", NUM_COLUMNS); List kvs = new ArrayList(); @@ -311,26 +278,6 @@ public class TestMultiColumnScanner { region.getLog().closeAndDelete(); } - static HRegion createRegion(String tableName, - Compression.Algorithm comprAlgo, BloomType bloomType, int maxVersions) - throws IOException { - HColumnDescriptor hcd = - new HColumnDescriptor(FAMILY_BYTES, maxVersions, - comprAlgo.getName(), - HColumnDescriptor.DEFAULT_IN_MEMORY, - HColumnDescriptor.DEFAULT_BLOCKCACHE, - HColumnDescriptor.DEFAULT_TTL, - bloomType.toString()); - HTableDescriptor htd = new HTableDescriptor(tableName); - htd.addFamily(hcd); - HRegionInfo info = - new HRegionInfo(Bytes.toBytes(tableName), null, null, false); - HRegion region = HRegion.createHRegion( - info, TEST_UTIL.getDataTestDir(), TEST_UTIL.getConfiguration(), - htd); - return region; - } - private static String getRowQualStr(KeyValue kv) { String rowStr = Bytes.toString(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength()); diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithBloomError.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithBloomError.java index 3ac759774cc..539ba131dc2 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithBloomError.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithBloomError.java @@ -34,10 +34,15 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTestUtil; +import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.hfile.Compression; +import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFilePrettyPrinter; import org.apache.hadoop.hbase.regionserver.HRegion.RegionScannerImpl; import org.apache.hadoop.hbase.util.Bytes; @@ -48,7 +53,6 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; -import static org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner.*; import static org.junit.Assert.*; /** @@ -63,6 +67,8 @@ public class TestScanWithBloomError { LogFactory.getLog(TestScanWithBloomError.class); private static final String TABLE_NAME = "ScanWithBloomError"; + private static final String FAMILY = "myCF"; + private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY); private static final String ROW = "theRow"; private static final String QUALIFIER_PREFIX = "qual"; private static final byte[] ROW_BYTES = Bytes.toBytes(ROW); @@ -73,7 +79,7 @@ public class TestScanWithBloomError { private Configuration conf; private final static HBaseTestingUtility TEST_UTIL = - new HBaseTestingUtility(); + new HBaseTestingUtility(); @Parameters public static final Collection parameters() { @@ -96,8 +102,10 @@ public class TestScanWithBloomError { @Test public void testThreeStoreFiles() throws IOException { - region = createRegion(TABLE_NAME, Compression.Algorithm.GZ, bloomType, - MAX_VERSIONS); + region = TEST_UTIL.createTestRegion(TABLE_NAME, + FAMILY, Compression.Algorithm.GZ, bloomType, + TestMultiColumnScanner.MAX_VERSIONS, + HColumnDescriptor.DEFAULT_BLOCKCACHE, HFile.DEFAULT_BLOCKSIZE); createStoreFile(new int[] {1, 2, 6}); createStoreFile(new int[] {1, 2, 3, 7}); createStoreFile(new int[] {1, 9}); @@ -178,8 +186,10 @@ public class TestScanWithBloomError { } private void addColumnSetToScan(Scan scan, int[] colIds) { - for (int colId : colIds) - scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qualFromId(colId))); + for (int colId : colIds) { + scan.addColumn(FAMILY_BYTES, + Bytes.toBytes(qualFromId(colId))); + } } private String qualFromId(int colId) { @@ -194,7 +204,7 @@ public class TestScanWithBloomError { String qual = qualFromId(colId); allColIds.add(colId); KeyValue kv = KeyValueTestUtil.create(ROW, FAMILY, - qual, ts, createValue(ROW, qual, ts)); + qual, ts, TestMultiColumnScanner.createValue(ROW, qual, ts)); p.add(kv); } region.put(p); diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestSeekOptimizations.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestSeekOptimizations.java index 5951f58bcdb..1d7aa33a1dd 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/TestSeekOptimizations.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestSeekOptimizations.java @@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.regionserver; import static org.apache.hadoop.hbase.HBaseTestingUtility.assertKVListsEqual; -import static org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner.*; import static org.junit.Assert.assertTrue; import java.io.IOException; @@ -37,11 +36,16 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.hfile.Compression; +import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.util.Bytes; import org.junit.After; import org.junit.Before; @@ -63,6 +67,9 @@ public class TestSeekOptimizations { LogFactory.getLog(TestSeekOptimizations.class); // Constants + private static final String FAMILY = "myCF"; + private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY); + private static final int PUTS_PER_ROW_COL = 50; private static final int DELETES_PER_ROW_COL = 10; @@ -111,6 +118,9 @@ public class TestSeekOptimizations { private StoreFile.BloomType bloomType; private long totalSeekDiligent, totalSeekLazy; + + private final static HBaseTestingUtility TEST_UTIL = + new HBaseTestingUtility(); @Parameters public static final Collection parameters() { @@ -131,9 +141,10 @@ public class TestSeekOptimizations { @Test public void testMultipleTimestampRanges() throws IOException { - region = TestMultiColumnScanner.createRegion( - TestSeekOptimizations.class.getName(), comprAlgo, bloomType, - Integer.MAX_VALUE); + region = TEST_UTIL.createTestRegion( + TestSeekOptimizations.class.getName(), FAMILY, comprAlgo, bloomType, + Integer.MAX_VALUE, HColumnDescriptor.DEFAULT_BLOCKCACHE, + HFile.DEFAULT_BLOCKSIZE); // Delete the given timestamp and everything before. final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1; @@ -412,7 +423,8 @@ public class TestSeekOptimizations { region.delete(del, null, true); } - // Add remaining timestamps (those we have not deleted) to expected results + // Add remaining timestamps (those we have not deleted) to expected + // results for (long ts : putTimestamps) { expectedKVs.add(new KeyValue(rowBytes, FAMILY_BYTES, qualBytes, ts, KeyValue.Type.Put)); diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/metrics/TestSchemaConfigured.java b/src/test/java/org/apache/hadoop/hbase/regionserver/metrics/TestSchemaConfigured.java index 582693d74e7..2a77d2048fd 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/metrics/TestSchemaConfigured.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/metrics/TestSchemaConfigured.java @@ -30,7 +30,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.regionserver.HRegion; -import org.apache.hadoop.hbase.util.ClassSize; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONStringer; import org.junit.Test; @@ -110,12 +109,31 @@ public class TestSchemaConfigured { } @Test(expected=IllegalStateException.class) - public void testConfigureWithUnconfigured() { - SchemaConfigured unconfigured = new SchemaConfigured(); + public void testConfigureWithUnconfigured1() { + SchemaConfigured unconfigured = new SchemaConfigured(null, "t1", null); SchemaConfigured target = new SchemaConfigured(); unconfigured.passSchemaMetricsTo(target); } + @Test(expected=IllegalStateException.class) + public void testConfigureWithUnconfigured2() { + SchemaConfigured unconfigured = new SchemaConfigured(null, null, "cf1"); + SchemaConfigured target = new SchemaConfigured(); + unconfigured.passSchemaMetricsTo(target); + } + + /** + * Configuring with an uninitialized object is equivalent to re-setting + * schema metrics configuration. + */ + public void testConfigureWithNull() { + SchemaConfigured unconfigured = new SchemaConfigured(); + SchemaConfigured target = new SchemaConfigured(null, "t1", "cf1"); + unconfigured.passSchemaMetricsTo(target); + assertTrue(target.getTableName() == null); + assertTrue(target.getColumnFamilyName() == null); + } + public void testConfigurePartiallyDefined() { final SchemaConfigured sc = new SchemaConfigured(null, "t1", "cf1"); final SchemaConfigured target1 = new SchemaConfigured(null, "t2", null); @@ -138,7 +156,15 @@ public class TestSchemaConfigured { public void testConflictingConf() { SchemaConfigured sc = new SchemaConfigured(null, "t1", "cf1"); SchemaConfigured target = new SchemaConfigured(null, "t2", "cf1"); - target.passSchemaMetricsTo(sc); + sc.passSchemaMetricsTo(target); + } + + /** We allow setting CF to unknown and then reconfiguring it */ + public void testReconfigureUnknownCF() { + SchemaConfigured sc = new SchemaConfigured(null, "t1", "cf1"); + SchemaConfigured target = + new SchemaConfigured(null, "t1", SchemaMetrics.UNKNOWN); + sc.passSchemaMetricsTo(target); } /**